asimd.ll - OpenGrok cross reference for /frameworks/rs/driver/runtime/arch/asimd.ll

Lines Matching refs:x
8 declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
9 declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
10 declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
11 declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
12 declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
13 declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
14 declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
15 declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
17 declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
18 declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
19 declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
20 declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
21 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
22 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
23 declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
24 declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
26 declare <8 x i8>  @llvm.aarch64.neon.sqshl.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
27 declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
28 declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
30 declare <8 x i8>  @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
31 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
32 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
34 declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
35 declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
37 declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
38 declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
40 declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
41 declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
43 declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
44 declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
50 define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
51   %1 = insertelement <4 x float> undef, float %in, i32 0
52   %2 = insertelement <4 x float> %1, float %in, i32 1
53   %3 = insertelement <4 x float> %2, float %in, i32 2
54   %4 = insertelement <4 x float> %3, float %in, i32 3
55   ret <4 x float> %4
58 define internal <4 x i32> @smear_4i(i32 %in) nounwind readnone alwaysinline {
59   %1 = insertelement <4 x i32> undef, i32 %in, i32 0
60   %2 = insertelement <4 x i32> %1, i32 %in, i32 1
61   %3 = insertelement <4 x i32> %2, i32 %in, i32 2
62   %4 = insertelement <4 x i32> %3, i32 %in, i32 3
63   ret <4 x i32> %4
66 define internal <4 x i16> @smear_4s(i16 %in) nounwind readnone alwaysinline {
67   %1 = insertelement <4 x i16> undef, i16 %in, i32 0
68   %2 = insertelement <4 x i16> %1, i16 %in, i32 1
69   %3 = insertelement <4 x i16> %2, i16 %in, i32 2
70   %4 = insertelement <4 x i16> %3, i16 %in, i32 3
71   ret <4 x i16> %4
76 define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
77   %1 = insertelement <2 x float> undef, float %in, i32 0
78   %2 = insertelement <2 x float> %1, float %in, i32 1
79   ret <2 x float> %2
82 define internal <2 x i32> @smear_2i(i32 %in) nounwind readnone alwaysinline {
83   %1 = insertelement <2 x i32> undef, i32 %in, i32 0
84   %2 = insertelement <2 x i32> %1, i32 %in, i32 1
85   ret <2 x i32> %2
88 define internal <2 x i16> @smear_2s(i16 %in) nounwind readnone alwaysinline {
89   %1 = insertelement <2 x i16> undef, i16 %in, i32 0
90   %2 = insertelement <2 x i16> %1, i16 %in, i32 1
91   ret <2 x i16> %2
95 define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline {
96   %1 = insertelement <4 x i32> undef, i32 %in, i32 0
97   %2 = insertelement <4 x i32> %1, i32 %in, i32 1
98   %3 = insertelement <4 x i32> %2, i32 %in, i32 2
99   %4 = insertelement <4 x i32> %3, i32 %in, i32 3
100   ret <4 x i32> %4
108 define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) noun…
109 …%1 = tail call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %value, <4 x float> %high) no…
110 …%2 = tail call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %1, <4 x float> %low) nounwin…
111   ret <4 x float> %2
114 define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
115   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
116   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
117 …%out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float>…
118   ret <4 x float> %out
121 define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) noun…
122 …%_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32…
123   %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
124 …%_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
125 …%a = tail call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %_value, <4 x float> %_high) …
126 …%b = tail call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %_low) nounwi…
127   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
128   ret <3 x float> %c
131 define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
132 …%_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32…
133   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
134   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
135 …%a = tail call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %_value, <4 x float> %_high) …
136 …%b = tail call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %_low) nounwi…
137   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
138   ret <3 x float> %c
141 define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) noun…
142 …%1 = tail call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %value, <2 x float> %high) no…
143 …%2 = tail call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %1, <2 x float> %low) nounwin…
144   ret <2 x float> %2
147 define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
148   %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
149   %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
150 …%a = tail call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %value, <2 x float> %_high) n…
151 …%b = tail call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %_low) nounwi…
152   ret <2 x float> %b
165 define <4 x i32> @_Z5clampDv4_iS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind rea…
166 …%1 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind…
167 …%2 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind read…
168   ret <4 x i32> %2
171 define <4 x i32> @_Z5clampDv4_iii(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly {
172   %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
173   %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
174 …%1 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwin…
175 …%2 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind rea…
176   ret <4 x i32> %2
179 define <3 x i32> @_Z5clampDv3_iS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind rea…
180   %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
181   %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
182   %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
183 …%a = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwi…
184 …%b = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind rea…
185   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
186   ret <3 x i32> %c
189 define <3 x i32> @_Z5clampDv3_iii(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly {
190   %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
191   %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
192   %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
193 …%a = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwi…
194 …%b = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind rea…
195   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
196   ret <3 x i32> %c
199 define <2 x i32> @_Z5clampDv2_iS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind rea…
200 …%1 = tail call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind…
201 …%2 = tail call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind read…
202   ret <2 x i32> %2
205 define <2 x i32> @_Z5clampDv2_iii(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly {
206   %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone
207   %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone
208 …%a = tail call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwin…
209 …%b = tail call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind rea…
210   ret <2 x i32> %b
215 define <4 x i32> @_Z5clampDv4_jS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind rea…
216 …%1 = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind…
217 …%2 = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind read…
218   ret <4 x i32> %2
221 define <4 x i32> @_Z5clampDv4_jjj(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly {
222   %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
223   %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
224 …%1 = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwin…
225 …%2 = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind rea…
226   ret <4 x i32> %2
229 define <3 x i32> @_Z5clampDv3_jS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind rea…
230   %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
231   %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
232   %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
233 …%a = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwi…
234 …%b = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind rea…
235   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
236   ret <3 x i32> %c
239 define <3 x i32> @_Z5clampDv3_jjj(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly {
240   %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
241   %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
242   %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
243 …%a = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwi…
244 …%b = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind rea…
245   %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
246   ret <3 x i32> %c
249 define <2 x i32> @_Z5clampDv2_jS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind rea…
250 …%1 = tail call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind…
251 …%2 = tail call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind read…
252   ret <2 x i32> %2
255 define <2 x i32> @_Z5clampDv2_jjj(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly {
256   %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone
257   %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone
258 …%a = tail call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwin…
259 …%b = tail call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind rea…
260   ret <2 x i32> %b
268 define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
269 …%1 = tail call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %v1, <4 x float> %v2) nounwin…
270   ret <4 x float> %1
273 define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
274   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
275 …%2 = tail call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %v1, <4 x float> %1) nounwind…
276   ret <4 x float> %2
279 define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
280   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
281   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
282 …%3 = tail call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %1, <4 x float> %2) nounwind …
283   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
284   ret <3 x float> %4
287 define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
288   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
289   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
290 …%3 = tail call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %1, <4 x float> %2) nounwind …
291   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
292   ret <3 x float> %c
295 define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
296 …%1 = tail call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %v1, <2 x float> %v2) nounwin…
297   ret <2 x float> %1
300 define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
301   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
302 …%2 = tail call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %v1, <2 x float> %1) nounwind…
303   ret <2 x float> %2
317 define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
318 …%1 = tail call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %v1, <4 x float> %v2) nounwin…
319   ret <4 x float> %1
322 define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
323   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
324 …%2 = tail call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %v1, <4 x float> %1) nounwind…
325   ret <4 x float> %2
328 define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
329   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
330   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
331 …%3 = tail call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %1, <4 x float> %2) nounwind …
332   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
333   ret <3 x float> %4
336 define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
337   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
338   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
339 …%3 = tail call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %1, <4 x float> %2) nounwind …
340   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
341   ret <3 x float> %c
344 define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
345 …%1 = tail call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %v1, <2 x float> %v2) nounwin…
346   ret <2 x float> %1
349 define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
350   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
351 …%2 = tail call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %v1, <2 x float> %1) nounwind…
352   ret <2 x float> %2
372 define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
373   %1 = sext <2 x i8> %v1 to <2 x i32>
374   %2 = sext <2 x i8> %v2 to <2 x i32>
375 …%3 = tail call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readno…
376   %4 = trunc <2 x i32> %3 to <2 x i8>
377   ret <2 x i8> %4
380 define <3 x i8> @_Z3maxDv3_cS_(i32 %v1, i32 %v2) nounwind readnone {
381   %1 = bitcast i32 %v1 to <4 x i8>
382   %2 = bitcast i32 %v2 to <4 x i8>
383   %3 = sext <4 x i8> %1 to <4 x i32>
384   %4 = sext <4 x i8> %2 to <4 x i32>
385 …%5 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readno…
386   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
387   %7 = trunc <3 x i32> %6 to <3 x i8>
388   ret <3 x i8> %7
391 define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
392   %1 = sext <4 x i8> %v1 to <4 x i32>
393   %2 = sext <4 x i8> %v2 to <4 x i32>
394 …%3 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
395   %4 = trunc <4 x i32> %3 to <4 x i8>
396   ret <4 x i8> %4
405 define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
406   %1 = sext <2 x i16> %v1 to <2 x i32>
407   %2 = sext <2 x i16> %v2 to <2 x i32>
408 …%3 = tail call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readno…
409   %4 = trunc <2 x i32> %3 to <2 x i16>
410   ret <2 x i16> %4
413 define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
414   %1 = sext <3 x i16> %v1 to <3 x i32>
415   %2 = sext <3 x i16> %v2 to <3 x i32>
416   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
417   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
418 …%5 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readno…
419   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
420   %7 = trunc <3 x i32> %6 to <3 x i16>
421   ret <3 x i16> %7
424 define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
425   %1 = sext <4 x i16> %v1 to <4 x i32>
426   %2 = sext <4 x i16> %v2 to <4 x i32>
427 …%3 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
428   %4 = trunc <4 x i32> %3 to <4 x i16>
429   ret <4 x i16> %4
438 define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
439 …%1 = tail call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind read…
440   ret <2 x i32> %1
443 define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
444   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
445   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
446 …%3 = tail call <4 x i32   > @llvm.aarch64.neon.smax.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind rea…
447   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
448   ret <3 x i32> %4
451 define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
452 …%1 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind read…
453   ret <4 x i32> %1
470 define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
471   %1 = zext <2 x i8> %v1 to <2 x i32>
472   %2 = zext <2 x i8> %v2 to <2 x i32>
473 …%3 = tail call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readno…
474   %4 = trunc <2 x i32> %3 to <2 x i8>
475   ret <2 x i8> %4
478 define <3 x i8> @_Z3maxDv3_hS_(i32 %v1, i32 %v2) nounwind readnone {
479   %1 = bitcast i32 %v1 to <4 x i8>
480   %2 = bitcast i32 %v2 to <4 x i8>
481   %3 = zext <4 x i8> %1 to <4 x i32>
482   %4 = zext <4 x i8> %2 to <4 x i32>
483 …%5 = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readno…
484   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
485   %7 = trunc <3 x i32> %6 to <3 x i8>
486   ret <3 x i8> %7
489 define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
490   %1 = zext <4 x i8> %v1 to <4 x i32>
491   %2 = zext <4 x i8> %v2 to <4 x i32>
492 …%3 = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
493   %4 = trunc <4 x i32> %3 to <4 x i8>
494   ret <4 x i8> %4
503 define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
504   %1 = zext <2 x i16> %v1 to <2 x i32>
505   %2 = zext <2 x i16> %v2 to <2 x i32>
506 …%3 = tail call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readno…
507   %4 = trunc <2 x i32> %3 to <2 x i16>
508   ret <2 x i16> %4
511 define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
512   %1 = zext <3 x i16> %v1 to <3 x i32>
513   %2 = zext <3 x i16> %v2 to <3 x i32>
514   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
515   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
516 …%5 = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readno…
517   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
518   %7 = trunc <3 x i32> %6 to <3 x i16>
519   ret <3 x i16> %7
522 define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
523   %1 = zext <4 x i16> %v1 to <4 x i32>
524   %2 = zext <4 x i16> %v2 to <4 x i32>
525 …%3 = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
526   %4 = trunc <4 x i32> %3 to <4 x i16>
527   ret <4 x i16> %4
536 define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
537 …%1 = tail call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind read…
538   ret <2 x i32> %1
541 define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
542   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
543   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
544 …%3 = tail call <4 x i32   > @llvm.aarch64.neon.umax.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind rea…
545   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
546   ret <3 x i32> %4
549 define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
550 …%1 = tail call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind read…
551   ret <4 x i32> %1
562 define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
563   %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2)
564   ret <2 x float> %1
567 define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
568   %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2)
569   ret <2 x float> %1
572 define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
573   %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2)
574   ret <3 x float> %1
577 define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
578   %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2)
579   ret <3 x float> %1
582 define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
583   %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2)
584   ret <4 x float> %1
587 define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
588   %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2)
589   ret <4 x float> %1
603 define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
604   %1 = sext <2 x i8> %v1 to <2 x i32>
605   %2 = sext <2 x i8> %v2 to <2 x i32>
606 …%3 = tail call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readno…
607   %4 = trunc <2 x i32> %3 to <2 x i8>
608   ret <2 x i8> %4
611 define <3 x i8> @_Z3minDv3_cS_(i32 %v1, i32 %v2) nounwind readnone {
612   %1 = bitcast i32 %v1 to <4 x i8>
613   %2 = bitcast i32 %v2 to <4 x i8>
614   %3 = sext <4 x i8> %1 to <4 x i32>
615   %4 = sext <4 x i8> %2 to <4 x i32>
616 …%5 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readno…
617   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
618   %7 = trunc <3 x i32> %6 to <3 x i8>
619   ret <3 x i8> %7
622 define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
623   %1 = sext <4 x i8> %v1 to <4 x i32>
624   %2 = sext <4 x i8> %v2 to <4 x i32>
625 …%3 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
626   %4 = trunc <4 x i32> %3 to <4 x i8>
627   ret <4 x i8> %4
636 define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
637   %1 = sext <2 x i16> %v1 to <2 x i32>
638   %2 = sext <2 x i16> %v2 to <2 x i32>
639 …%3 = tail call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readno…
640   %4 = trunc <2 x i32> %3 to <2 x i16>
641   ret <2 x i16> %4
644 define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
645   %1 = sext <3 x i16> %v1 to <3 x i32>
646   %2 = sext <3 x i16> %v2 to <3 x i32>
647   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
648   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
649 …%5 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readno…
650   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
651   %7 = trunc <3 x i32> %6 to <3 x i16>
652   ret <3 x i16> %7
655 define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
656   %1 = sext <4 x i16> %v1 to <4 x i32>
657   %2 = sext <4 x i16> %v2 to <4 x i32>
658 …%3 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
659   %4 = trunc <4 x i32> %3 to <4 x i16>
660   ret <4 x i16> %4
669 define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
670 …%1 = tail call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind read…
671   ret <2 x i32> %1
674 define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
675   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
676   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
677 …%3 = tail call <4 x i32   > @llvm.aarch64.neon.smin.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind rea…
678   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
679   ret <3 x i32> %4
682 define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
683 …%1 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind read…
684   ret <4 x i32> %1
701 define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
702   %1 = zext <2 x i8> %v1 to <2 x i32>
703   %2 = zext <2 x i8> %v2 to <2 x i32>
704 …%3 = tail call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readno…
705   %4 = trunc <2 x i32> %3 to <2 x i8>
706   ret <2 x i8> %4
709 define <3 x i8> @_Z3minDv3_hS_(i32 %v1, i32 %v2) nounwind readnone {
710   %1 = bitcast i32 %v1 to <4 x i8>
711   %2 = bitcast i32 %v2 to <4 x i8>
712   %3 = zext <4 x i8> %1 to <4 x i32>
713   %4 = zext <4 x i8> %2 to <4 x i32>
714 …%5 = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readno…
715   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
716   %7 = trunc <3 x i32> %6 to <3 x i8>
717   ret <3 x i8> %7
720 define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
721   %1 = zext <4 x i8> %v1 to <4 x i32>
722   %2 = zext <4 x i8> %v2 to <4 x i32>
723 …%3 = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
724   %4 = trunc <4 x i32> %3 to <4 x i8>
725   ret <4 x i8> %4
734 define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
735   %1 = zext <2 x i16> %v1 to <2 x i32>
736   %2 = zext <2 x i16> %v2 to <2 x i32>
737 …%3 = tail call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readno…
738   %4 = trunc <2 x i32> %3 to <2 x i16>
739   ret <2 x i16> %4
742 define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
743   %1 = zext <3 x i16> %v1 to <3 x i32>
744   %2 = zext <3 x i16> %v2 to <3 x i32>
745   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
746   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
747 …%5 = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readno…
748   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
749   %7 = trunc <3 x i32> %6 to <3 x i16>
750   ret <3 x i16> %7
753 define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
754   %1 = zext <4 x i16> %v1 to <4 x i32>
755   %2 = zext <4 x i16> %v2 to <4 x i32>
756 …%3 = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readno…
757   %4 = trunc <4 x i32> %3 to <4 x i16>
758   ret <4 x i16> %4
767 define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
768 …%1 = tail call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind read…
769   ret <2 x i32> %1
772 define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
773   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
774   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
775 …%3 = tail call <4 x i32   > @llvm.aarch64.neon.umin.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind rea…
776   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
777   ret <3 x i32> %4
780 define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
781 …%1 = tail call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind read…
782   ret <4 x i32> %1
793 define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
794   %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2)
795   ret <2 x float> %1
798 define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
799   %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2)
800   ret <2 x float> %1
803 define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
804   %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2)
805   ret <3 x float> %1
808 define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
809   %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2)
810   ret <3 x float> %1
813 define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
814   %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2)
815   ret <4 x float> %1
818 define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
819   %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2)
820   ret <4 x float> %1
828 @yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16
829 @yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16
830 @yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
831 @yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16
834 define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline {
843   %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone
844   %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone
845   %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone
847   %mu = load <4 x i32>, <4 x i32>* @yuv_U, align 8
848   %mv = load <4 x i32>, <4 x i32>* @yuv_V, align 8
849   %_u2 = mul <4 x i32> %_u, %mu
850   %_v2 = mul <4 x i32> %_v, %mv
851   %_y2 = add <4 x i32> %_y, %_u2
852   %_y3 = add <4 x i32> %_y2, %_v2
854 …; %r1 = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32…
855 ;  %r2 = trunc <4 x i16> %r1 to <4 x i8>
856 ;  ret <4 x i8> %r2
858   %c0 = load <4 x i32>, <4 x i32>* @yuv_0, align 8
859   %c255 = load <4 x i32>, <4 x i32>* @yuv_255, align 8
860 …%r1 = tail call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind re…
861 …%r2 = tail call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind r…
862   %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8>
863   %r4 = trunc <4 x i32> %r3 to <4 x i8>
864   ret <4 x i8> %r4
871 define <2 x float> @_Z10half_recipDv2_f(<2 x float> %v) nounwind readnone {
872   %1 = tail call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %v) nounwind readnone
873 …%2 = tail call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %1, <2 x float> %v) nounwin…
874   %3 = fmul <2 x float> %1, %2
875 …%4 = tail call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %3, <2 x float> %v) nounwin…
876   %5 = fmul <2 x float> %4, %3
877   ret <2 x float> %5
880 define <4 x float> @_Z10half_recipDv4_f(<4 x float> %v) nounwind readnone {
881   %1 = tail call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %v) nounwind readnone
882 …%2 = tail call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %1, <4 x float> %v) nounwin…
883   %3 = fmul <4 x float> %1, %2
884 …%4 = tail call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %3, <4 x float> %v) nounwin…
885   %5 = fmul <4 x float> %4, %3
886   ret <4 x float> %5
889 define <3 x float> @_Z10half_recipDv3_f(<3 x float> %v) nounwind readnone {
890   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
891   %2 = tail call <4 x float> @_Z10half_recipDv4_f(<4 x float> %1) nounwind readnone
892   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
893   ret <3 x float> %3
902   %1 = insertelement <2 x float> undef, float %v, i32 0
903   %2 = tail call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %1) nounwind readnone
904   %3 = fmul <2 x float> %2, %2
905 …%4 = tail call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %1, <2 x float> %3) nounwi…
906   %5 = fmul <2 x float> %2, %4
907   %6 = extractelement <2 x float> %5, i32 0
911 define <2 x float> @_Z10half_rsqrtDv2_f(<2 x float> %v) nounwind readnone {
912   %1 = tail call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %v) nounwind readnone
913   %2 = fmul <2 x float> %1, %1
914 …%3 = tail call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v, <2 x float> %2) nounwi…
915   %4 = fmul <2 x float> %1, %3
916   ret <2 x float> %4
919 define <3 x float> @_Z10half_rsqrtDv3_f(<3 x float> %v) nounwind readnone {
920   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
921   %2 = tail call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %1) nounwind readnone
922   %3 = fmul <4 x float> %2, %2
923 …%4 = tail call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %1, <4 x float> %3) nounwi…
924   %5 = fmul <4 x float> %2, %4
925   %6 = shufflevector <4 x float> %5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
926   ret <3 x float> %6
929 define <4 x float> @_Z10half_rsqrtDv4_f(<4 x float> %v) nounwind readnone {
930   %1 = tail call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %v) nounwind readnone
931   %2 = fmul <4 x float> %1, %1
932 …%3 = tail call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v, <4 x float> %2) nounwi…
933   %4 = fmul <4 x float> %1, %3
934   ret <4 x float> %4
941 %struct.rs_matrix4x4 = type { [16 x float] }
942 %struct.rs_matrix3x3 = type { [9 x float] }
943 %struct.rs_matrix2x2 = type { [4 x float] }
945 define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
946   %1 = insertelement <4 x float> undef, float %in, i32 0
947   %2 = insertelement <4 x float> %1, float %in, i32 1
948   %3 = insertelement <4 x float> %2, float %in, i32 2
949   %4 = insertelement <4 x float> %3, float %in, i32 3
950   ret <4 x float> %4
954 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <…
955   %x0 = extractelement <3 x float> %in, i32 0
956   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
957   %y0 = extractelement <3 x float> %in, i32 1
958   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
959   %z0 = extractelement <3 x float> %in, i32 2
960   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
963   %px2 = bitcast float* %px to <4 x float>*
964   %xm = load <4 x float>, <4 x float>* %px2, align 4
967   %py2 = bitcast float* %py to <4 x float>*
968   ; %ym = call <4 x float> @llvm.aarch64.neon.ld4.v4f32(i8* %py2, i32 4) nounwind
969   %ym = load <4 x float>, <4 x float>* %py2, align 4
972   %pz2 = bitcast float* %pz to <4 x float>*
973 ;  %zm2 = call <4 x float> @llvm.aarch64.neon.ld4.v4f32(i8* %pz2, i32 4) nounwind
974   %zm2 = load <4 x float>, <4 x float>* %pz2, align 4
975   %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
977   %a1 = fmul <4 x float> %x, %xm
978   %a2 = fmul <4 x float> %y, %ym
979   %a3 = fadd <4 x float> %a1, %a2
980   %a4 = fmul <4 x float> %z, %zm
981   %a5 = fadd <4 x float> %a4, %a3
982   %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
983   ret <3 x float> %a6
986 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <…
987   %x0 = extractelement <2 x float> %in, i32 0
988   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
989   %y0 = extractelement <2 x float> %in, i32 1
990   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
993   %px2 = bitcast float* %px to <4 x float>*
994   %xm = load <4 x float>, <4 x float>* %px2, align 4
996   %py2 = bitcast float* %py to <4 x float>*
997   %ym = load <4 x float>, <4 x float>* %py2, align 4
999   %a1 = fmul <4 x float> %x, %xm
1000   %a2 = fmul <4 x float> %y, %ym
1001   %a3 = fadd <4 x float> %a1, %a2
1002   %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1003   ret <3 x float> %a4
1006 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <…
1007   %x0 = extractelement <4 x float> %in, i32 0
1008   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1009   %y0 = extractelement <4 x float> %in, i32 1
1010   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1011   %z0 = extractelement <4 x float> %in, i32 2
1012   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1013   %w0 = extractelement <4 x float> %in, i32 3
1014   %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
1017   %px2 = bitcast float* %px to <4 x float>*
1018   %xm = load <4 x float>, <4 x float>* %px2, align 4
1020   %py2 = bitcast float* %py to <4 x float>*
1021   %ym = load <4 x float>, <4 x float>* %py2, align 4
1023   %pz2 = bitcast float* %pz to <4 x float>*
1024   %zm = load <4 x float>, <4 x float>* %pz2, align 4
1026   %pw2 = bitcast float* %pw to <4 x float>*
1027   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1029   %a1 = fmul <4 x float> %x, %xm
1030   %a2 = fmul <4 x float> %y, %ym
1031   %a3 = fadd <4 x float> %a1, %a2
1032   %a4 = fmul <4 x float> %z, %zm
1033   %a5 = fadd <4 x float> %a3, %a4
1034   %a6 = fmul <4 x float> %w, %wm
1035   %a7 = fadd <4 x float> %a5, %a6
1036   ret <4 x float> %a7
1039 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <…
1040   %x0 = extractelement <3 x float> %in, i32 0
1041   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1042   %y0 = extractelement <3 x float> %in, i32 1
1043   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1044   %z0 = extractelement <3 x float> %in, i32 2
1045   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1048   %px2 = bitcast float* %px to <4 x float>*
1049   %xm = load <4 x float>, <4 x float>* %px2, align 4
1051   %py2 = bitcast float* %py to <4 x float>*
1052   %ym = load <4 x float>, <4 x float>* %py2, align 4
1054   %pz2 = bitcast float* %pz to <4 x float>*
1055   %zm = load <4 x float>, <4 x float>* %pz2, align 4
1057   %pw2 = bitcast float* %pw to <4 x float>*
1058   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1060   %a1 = fmul <4 x float> %x, %xm
1061   %a2 = fadd <4 x float> %wm, %a1
1062   %a3 = fmul <4 x float> %y, %ym
1063   %a4 = fadd <4 x float> %a2, %a3
1064   %a5 = fmul <4 x float> %z, %zm
1065   %a6 = fadd <4 x float> %a4, %a5
1066   ret <4 x float> %a6
1069 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <…
1070   %x0 = extractelement <2 x float> %in, i32 0
1071   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1072   %y0 = extractelement <2 x float> %in, i32 1
1073   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1076   %px2 = bitcast float* %px to <4 x float>*
1077   %xm = load <4 x float>, <4 x float>* %px2, align 4
1079   %py2 = bitcast float* %py to <4 x float>*
1080   %ym = load <4 x float>, <4 x float>* %py2, align 4
1082   %pw2 = bitcast float* %pw to <4 x float>*
1083   %wm = load <4 x float>, <4 x float>* %pw2, align 4
1085   %a1 = fmul <4 x float> %x, %xm
1086   %a2 = fadd <4 x float> %wm, %a1
1087   %a3 = fmul <4 x float> %y, %ym
1088   %a4 = fadd <4 x float> %a2, %a3
1089   ret <4 x float> %a4
1099 @fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, ali…
1100 @fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
1101 @fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16
1103 declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
1104 declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
1107 define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
1108     %f255 = load <4 x float>, <4 x float>* @fc_255.0, align 16
1109     %f05 = load <4 x float>, <4 x float>* @fc_0.5, align 16
1110     %f0 = load <4 x float>, <4 x float>* @fc_0, align 16
1111     %v1 = fmul <4 x float> %f255, %color
1112     %v2 = fadd <4 x float> %f05, %v1
1113 …%v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255…
1114     %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone
1115     ret <4 x i8> %v4
1119 define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<4 x i32> %color) nounwind readnone {
1120     %1 = bitcast <4 x i32> %color to <4 x float>
1121     %2 = insertelement <4 x float> %1, float 1.0, i32 3
1122     %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
1123     ret <4 x i8> %3
1127 define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
1128     %1 = insertelement <4 x float> undef, float %r, i32 0
1129     %2 = insertelement <4 x float> %1, float %g, i32 1
1130     %3 = insertelement <4 x float> %2, float %b, i32 2
1131     %4 = insertelement <4 x float> %3, float 1.0, i32 3
1132     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
1133     ret <4 x i8> %5
1137 define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnon…
1138     %1 = insertelement <4 x float> undef, float %r, i32 0
1139     %2 = insertelement <4 x float> %1, float %g, i32 1
1140     %3 = insertelement <4 x float> %2, float %b, i32 2
1141     %4 = insertelement <4 x float> %3, float %a, i32 3
1142     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
1143     ret <4 x i8> %5