3 "// Generated by LLVM NVPTX Back-End\n"
10 " // .globl Subsample_Bicubic_uchar\n"
12 ".visible .entry Subsample_Bicubic_uchar(\n"
13 " .param .u64 Subsample_Bicubic_uchar_param_0,\n"
14 " .param .u64 Subsample_Bicubic_uchar_param_1,\n"
15 " .param .u32 Subsample_Bicubic_uchar_param_2,\n"
16 " .param .u32 Subsample_Bicubic_uchar_param_3,\n"
17 " .param .u32 Subsample_Bicubic_uchar_param_4,\n"
18 " .param .u32 Subsample_Bicubic_uchar_param_5,\n"
19 " .param .u32 Subsample_Bicubic_uchar_param_6,\n"
20 " .param .u32 Subsample_Bicubic_uchar_param_7,\n"
21 " .param .f32 Subsample_Bicubic_uchar_param_8\n"
24 " .reg .pred %p<6>;\n"
25 " .reg .b16 %rs<2>;\n"
26 " .reg .f32 %f<123>;\n"
27 " .reg .b32 %r<81>;\n"
28 " .reg .b64 %rd<22>;\n"
30 " ld.param.u32 %r4, [Subsample_Bicubic_uchar_param_3];\n"
31 " ld.param.u32 %r3, [Subsample_Bicubic_uchar_param_2];\n"
32 " // begin inline asm\n"
33 " mov.u32 %r9, %ctaid.x;\n"
34 " // end inline asm\n"
35 " // begin inline asm\n"
36 " mov.u32 %r10, %ctaid.y;\n"
37 " // end inline asm\n"
38 " // begin inline asm\n"
39 " mov.u32 %r11, %ntid.x;\n"
40 " // end inline asm\n"
41 " // begin inline asm\n"
42 " mov.u32 %r12, %ntid.y;\n"
43 " // end inline asm\n"
44 " // begin inline asm\n"
45 " mov.u32 %r13, %tid.x;\n"
46 " // end inline asm\n"
47 " // begin inline asm\n"
48 " mov.u32 %r14, %tid.y;\n"
49 " // end inline asm\n"
50 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
51 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
52 " setp.ge.s32 %p1, %r2, %r4;\n"
53 " setp.ge.s32 %p2, %r1, %r3;\n"
54 " or.pred %p3, %p2, %p1;\n"
56 " ld.param.f32 %f1, [Subsample_Bicubic_uchar_param_8];\n"
57 " ld.param.u32 %r8, [Subsample_Bicubic_uchar_param_7];\n"
58 " ld.param.u32 %r7, [Subsample_Bicubic_uchar_param_6];\n"
59 " ld.param.u32 %r6, [Subsample_Bicubic_uchar_param_5];\n"
60 " ld.param.u32 %r5, [Subsample_Bicubic_uchar_param_4];\n"
61 " ld.param.u64 %rd4, [Subsample_Bicubic_uchar_param_0];\n"
62 " ld.param.u64 %rd3, [Subsample_Bicubic_uchar_param_1];\n"
63 " cvta.to.global.u64 %rd1, %rd3;\n"
64 " cvt.rn.f32.s32 %f34, %r6;\n"
65 " cvt.rn.f32.s32 %f35, %r3;\n"
66 " div.rn.f32 %f36, %f34, %f35;\n"
67 " cvt.rn.f32.s32 %f37, %r7;\n"
68 " cvt.rn.f32.s32 %f38, %r4;\n"
69 " div.rn.f32 %f39, %f37, %f38;\n"
70 " cvt.rn.f32.s32 %f40, %r1;\n"
71 " add.f32 %f41, %f40, 0f3F000000;\n"
72 " fma.rn.f32 %f42, %f36, %f41, 0fBF000000;\n"
73 " cvt.rn.f32.s32 %f43, %r2;\n"
74 " add.f32 %f44, %f43, 0f3F000000;\n"
75 " fma.rn.f32 %f45, %f39, %f44, 0fBF000000;\n"
76 " cvt.rmi.f32.f32 %f4, %f42;\n"
77 " cvt.rmi.f32.f32 %f11, %f45;\n"
78 " sub.f32 %f46, %f42, %f4;\n"
79 " sub.f32 %f47, %f45, %f11;\n"
80 " setp.gt.s32 %p4, %r8, 8;\n"
81 " selp.b32 %r79, 65535, 255, %p4;\n"
82 " cvt.rn.f32.s32 %f48, %r79;\n"
83 " setp.eq.f32 %p5, %f1, 0f497423F0;\n"
84 " neg.f32 %f49, %f1;\n"
85 " selp.f32 %f50, 0f00000000, %f49, %p5;\n"
86 " add.f32 %f51, %f46, 0f3F800000;\n"
87 " mul.f32 %f52, %f50, 0fC0A00000;\n"
88 " fma.rn.f32 %f53, %f50, %f51, %f52;\n"
89 " mul.f32 %f54, %f50, 0f41000000;\n"
90 " fma.rn.f32 %f55, %f51, %f53, %f54;\n"
91 " mul.f32 %f56, %f50, 0fC0800000;\n"
92 " fma.rn.f32 %f57, %f51, %f55, %f56;\n"
93 " add.f32 %f58, %f50, 0f40000000;\n"
94 " add.f32 %f59, %f50, 0f40400000;\n"
95 " neg.f32 %f60, %f59;\n"
96 " fma.rn.f32 %f61, %f58, %f46, %f60;\n"
97 " mul.f32 %f62, %f46, %f61;\n"
98 " fma.rn.f32 %f63, %f46, %f62, 0f3F800000;\n"
99 " mov.f32 %f64, 0f3F800000;\n"
100 " sub.f32 %f65, %f64, %f46;\n"
101 " fma.rn.f32 %f66, %f58, %f65, %f60;\n"
102 " mul.f32 %f67, %f65, %f66;\n"
103 " fma.rn.f32 %f68, %f65, %f67, 0f3F800000;\n"
104 " sub.f32 %f69, %f64, %f57;\n"
105 " sub.f32 %f70, %f69, %f63;\n"
106 " sub.f32 %f71, %f70, %f68;\n"
107 " add.f32 %f72, %f47, 0f3F800000;\n"
108 " fma.rn.f32 %f73, %f50, %f72, %f52;\n"
109 " fma.rn.f32 %f74, %f72, %f73, %f54;\n"
110 " fma.rn.f32 %f75, %f72, %f74, %f56;\n"
111 " fma.rn.f32 %f76, %f58, %f47, %f60;\n"
112 " mul.f32 %f77, %f47, %f76;\n"
113 " fma.rn.f32 %f78, %f47, %f77, 0f3F800000;\n"
114 " sub.f32 %f79, %f64, %f47;\n"
115 " fma.rn.f32 %f80, %f58, %f79, %f60;\n"
116 " mul.f32 %f81, %f79, %f80;\n"
117 " fma.rn.f32 %f82, %f79, %f81, 0f3F800000;\n"
118 " sub.f32 %f83, %f64, %f75;\n"
119 " sub.f32 %f84, %f83, %f78;\n"
120 " sub.f32 %f85, %f84, %f82;\n"
121 " add.f32 %f2, %f4, 0fBF800000;\n"
122 " add.f32 %f3, %f11, 0fBF800000;\n"
123 " // begin inline asm\n"
124 " tex.2d.v4.f32.f32 {%r15, %r16, %r17, %r18}, [%rd4, {%f2, %f3}];\n"
125 " // end inline asm\n"
126 " mov.b32 %f86, %r15;\n"
127 " // begin inline asm\n"
128 " tex.2d.v4.f32.f32 {%r19, %r20, %r21, %r22}, [%rd4, {%f4, %f3}];\n"
129 " // end inline asm\n"
130 " mov.b32 %f87, %r19;\n"
131 " add.f32 %f6, %f4, 0f3F800000;\n"
132 " // begin inline asm\n"
133 " tex.2d.v4.f32.f32 {%r23, %r24, %r25, %r26}, [%rd4, {%f6, %f3}];\n"
134 " // end inline asm\n"
135 " mov.b32 %f88, %r23;\n"
136 " add.f32 %f8, %f4, 0f40000000;\n"
137 " // begin inline asm\n"
138 " tex.2d.v4.f32.f32 {%r27, %r28, %r29, %r30}, [%rd4, {%f8, %f3}];\n"
139 " // end inline asm\n"
140 " mov.b32 %f89, %r27;\n"
141 " mul.f32 %f90, %f63, %f87;\n"
142 " fma.rn.f32 %f91, %f57, %f86, %f90;\n"
143 " fma.rn.f32 %f92, %f68, %f88, %f91;\n"
144 " fma.rn.f32 %f93, %f71, %f89, %f92;\n"
145 " // begin inline asm\n"
146 " tex.2d.v4.f32.f32 {%r31, %r32, %r33, %r34}, [%rd4, {%f2, %f11}];\n"
147 " // end inline asm\n"
148 " mov.b32 %f94, %r31;\n"
149 " // begin inline asm\n"
150 " tex.2d.v4.f32.f32 {%r35, %r36, %r37, %r38}, [%rd4, {%f4, %f11}];\n"
151 " // end inline asm\n"
152 " mov.b32 %f95, %r35;\n"
153 " // begin inline asm\n"
154 " tex.2d.v4.f32.f32 {%r39, %r40, %r41, %r42}, [%rd4, {%f6, %f11}];\n"
155 " // end inline asm\n"
156 " mov.b32 %f96, %r39;\n"
157 " // begin inline asm\n"
158 " tex.2d.v4.f32.f32 {%r43, %r44, %r45, %r46}, [%rd4, {%f8, %f11}];\n"
159 " // end inline asm\n"
160 " mov.b32 %f97, %r43;\n"
161 " mul.f32 %f98, %f63, %f95;\n"
162 " fma.rn.f32 %f99, %f57, %f94, %f98;\n"
163 " fma.rn.f32 %f100, %f68, %f96, %f99;\n"
164 " fma.rn.f32 %f101, %f71, %f97, %f100;\n"
165 " add.f32 %f19, %f11, 0f3F800000;\n"
166 " // begin inline asm\n"
167 " tex.2d.v4.f32.f32 {%r47, %r48, %r49, %r50}, [%rd4, {%f2, %f19}];\n"
168 " // end inline asm\n"
169 " mov.b32 %f102, %r47;\n"
170 " // begin inline asm\n"
171 " tex.2d.v4.f32.f32 {%r51, %r52, %r53, %r54}, [%rd4, {%f4, %f19}];\n"
172 " // end inline asm\n"
173 " mov.b32 %f103, %r51;\n"
174 " // begin inline asm\n"
175 " tex.2d.v4.f32.f32 {%r55, %r56, %r57, %r58}, [%rd4, {%f6, %f19}];\n"
176 " // end inline asm\n"
177 " mov.b32 %f104, %r55;\n"
178 " // begin inline asm\n"
179 " tex.2d.v4.f32.f32 {%r59, %r60, %r61, %r62}, [%rd4, {%f8, %f19}];\n"
180 " // end inline asm\n"
181 " mov.b32 %f105, %r59;\n"
182 " mul.f32 %f106, %f63, %f103;\n"
183 " fma.rn.f32 %f107, %f57, %f102, %f106;\n"
184 " fma.rn.f32 %f108, %f68, %f104, %f107;\n"
185 " fma.rn.f32 %f109, %f71, %f105, %f108;\n"
186 " add.f32 %f27, %f11, 0f40000000;\n"
187 " // begin inline asm\n"
188 " tex.2d.v4.f32.f32 {%r63, %r64, %r65, %r66}, [%rd4, {%f2, %f27}];\n"
189 " // end inline asm\n"
190 " mov.b32 %f110, %r63;\n"
191 " // begin inline asm\n"
192 " tex.2d.v4.f32.f32 {%r67, %r68, %r69, %r70}, [%rd4, {%f4, %f27}];\n"
193 " // end inline asm\n"
194 " mov.b32 %f111, %r67;\n"
195 " // begin inline asm\n"
196 " tex.2d.v4.f32.f32 {%r71, %r72, %r73, %r74}, [%rd4, {%f6, %f27}];\n"
197 " // end inline asm\n"
198 " mov.b32 %f112, %r71;\n"
199 " // begin inline asm\n"
200 " tex.2d.v4.f32.f32 {%r75, %r76, %r77, %r78}, [%rd4, {%f8, %f27}];\n"
201 " // end inline asm\n"
202 " mov.b32 %f113, %r75;\n"
203 " mul.f32 %f114, %f63, %f111;\n"
204 " fma.rn.f32 %f115, %f57, %f110, %f114;\n"
205 " fma.rn.f32 %f116, %f68, %f112, %f115;\n"
206 " fma.rn.f32 %f117, %f71, %f113, %f116;\n"
207 " mul.f32 %f118, %f78, %f101;\n"
208 " fma.rn.f32 %f119, %f75, %f93, %f118;\n"
209 " fma.rn.f32 %f120, %f82, %f109, %f119;\n"
210 " fma.rn.f32 %f121, %f85, %f117, %f120;\n"
211 " mul.f32 %f122, %f121, %f48;\n"
212 " cvt.rzi.u16.f32 %rs1, %f122;\n"
213 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
214 " cvt.s64.s32 %rd20, %r80;\n"
215 " add.s64 %rd21, %rd1, %rd20;\n"
216 " st.global.u8 [%rd21], %rs1;\n"
221 " // .globl Subsample_Bicubic_uchar2\n"
222 ".visible .entry Subsample_Bicubic_uchar2(\n"
223 " .param .u64 Subsample_Bicubic_uchar2_param_0,\n"
224 " .param .u64 Subsample_Bicubic_uchar2_param_1,\n"
225 " .param .u32 Subsample_Bicubic_uchar2_param_2,\n"
226 " .param .u32 Subsample_Bicubic_uchar2_param_3,\n"
227 " .param .u32 Subsample_Bicubic_uchar2_param_4,\n"
228 " .param .u32 Subsample_Bicubic_uchar2_param_5,\n"
229 " .param .u32 Subsample_Bicubic_uchar2_param_6,\n"
230 " .param .u32 Subsample_Bicubic_uchar2_param_7,\n"
231 " .param .f32 Subsample_Bicubic_uchar2_param_8\n"
234 " .reg .pred %p<6>;\n"
235 " .reg .b16 %rs<3>;\n"
236 " .reg .f32 %f<160>;\n"
237 " .reg .b32 %r<81>;\n"
238 " .reg .b64 %rd<22>;\n"
240 " ld.param.u32 %r4, [Subsample_Bicubic_uchar2_param_3];\n"
241 " ld.param.u32 %r3, [Subsample_Bicubic_uchar2_param_2];\n"
242 " // begin inline asm\n"
243 " mov.u32 %r9, %ctaid.x;\n"
244 " // end inline asm\n"
245 " // begin inline asm\n"
246 " mov.u32 %r10, %ctaid.y;\n"
247 " // end inline asm\n"
248 " // begin inline asm\n"
249 " mov.u32 %r11, %ntid.x;\n"
250 " // end inline asm\n"
251 " // begin inline asm\n"
252 " mov.u32 %r12, %ntid.y;\n"
253 " // end inline asm\n"
254 " // begin inline asm\n"
255 " mov.u32 %r13, %tid.x;\n"
256 " // end inline asm\n"
257 " // begin inline asm\n"
258 " mov.u32 %r14, %tid.y;\n"
259 " // end inline asm\n"
260 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
261 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
262 " setp.ge.s32 %p1, %r2, %r4;\n"
263 " setp.ge.s32 %p2, %r1, %r3;\n"
264 " or.pred %p3, %p2, %p1;\n"
265 " @%p3 bra LBB1_2;\n"
266 " ld.param.f32 %f1, [Subsample_Bicubic_uchar2_param_8];\n"
267 " ld.param.u32 %r8, [Subsample_Bicubic_uchar2_param_7];\n"
268 " ld.param.u32 %r7, [Subsample_Bicubic_uchar2_param_6];\n"
269 " ld.param.u32 %r6, [Subsample_Bicubic_uchar2_param_5];\n"
270 " ld.param.u32 %r5, [Subsample_Bicubic_uchar2_param_4];\n"
271 " ld.param.u64 %rd4, [Subsample_Bicubic_uchar2_param_0];\n"
272 " ld.param.u64 %rd3, [Subsample_Bicubic_uchar2_param_1];\n"
273 " cvta.to.global.u64 %rd1, %rd3;\n"
274 " cvt.rn.f32.s32 %f34, %r6;\n"
275 " cvt.rn.f32.s32 %f35, %r3;\n"
276 " div.rn.f32 %f36, %f34, %f35;\n"
277 " cvt.rn.f32.s32 %f37, %r7;\n"
278 " cvt.rn.f32.s32 %f38, %r4;\n"
279 " div.rn.f32 %f39, %f37, %f38;\n"
280 " cvt.rn.f32.s32 %f40, %r1;\n"
281 " add.f32 %f41, %f40, 0f3F000000;\n"
282 " fma.rn.f32 %f42, %f36, %f41, 0fBF000000;\n"
283 " cvt.rn.f32.s32 %f43, %r2;\n"
284 " add.f32 %f44, %f43, 0f3F000000;\n"
285 " fma.rn.f32 %f45, %f39, %f44, 0fBF000000;\n"
286 " cvt.rmi.f32.f32 %f4, %f42;\n"
287 " cvt.rmi.f32.f32 %f11, %f45;\n"
288 " sub.f32 %f46, %f42, %f4;\n"
289 " sub.f32 %f47, %f45, %f11;\n"
290 " setp.gt.s32 %p4, %r8, 8;\n"
291 " selp.b32 %r79, 65535, 255, %p4;\n"
292 " cvt.rn.f32.s32 %f48, %r79;\n"
293 " setp.eq.f32 %p5, %f1, 0f497423F0;\n"
294 " neg.f32 %f49, %f1;\n"
295 " selp.f32 %f50, 0f00000000, %f49, %p5;\n"
296 " add.f32 %f51, %f46, 0f3F800000;\n"
297 " mul.f32 %f52, %f50, 0fC0A00000;\n"
298 " fma.rn.f32 %f53, %f50, %f51, %f52;\n"
299 " mul.f32 %f54, %f50, 0f41000000;\n"
300 " fma.rn.f32 %f55, %f51, %f53, %f54;\n"
301 " mul.f32 %f56, %f50, 0fC0800000;\n"
302 " fma.rn.f32 %f57, %f51, %f55, %f56;\n"
303 " add.f32 %f58, %f50, 0f40000000;\n"
304 " add.f32 %f59, %f50, 0f40400000;\n"
305 " neg.f32 %f60, %f59;\n"
306 " fma.rn.f32 %f61, %f58, %f46, %f60;\n"
307 " mul.f32 %f62, %f46, %f61;\n"
308 " fma.rn.f32 %f63, %f46, %f62, 0f3F800000;\n"
309 " mov.f32 %f64, 0f3F800000;\n"
310 " sub.f32 %f65, %f64, %f46;\n"
311 " fma.rn.f32 %f66, %f58, %f65, %f60;\n"
312 " mul.f32 %f67, %f65, %f66;\n"
313 " fma.rn.f32 %f68, %f65, %f67, 0f3F800000;\n"
314 " sub.f32 %f69, %f64, %f57;\n"
315 " sub.f32 %f70, %f69, %f63;\n"
316 " sub.f32 %f71, %f70, %f68;\n"
317 " add.f32 %f72, %f47, 0f3F800000;\n"
318 " fma.rn.f32 %f73, %f50, %f72, %f52;\n"
319 " fma.rn.f32 %f74, %f72, %f73, %f54;\n"
320 " fma.rn.f32 %f75, %f72, %f74, %f56;\n"
321 " fma.rn.f32 %f76, %f58, %f47, %f60;\n"
322 " mul.f32 %f77, %f47, %f76;\n"
323 " fma.rn.f32 %f78, %f47, %f77, 0f3F800000;\n"
324 " sub.f32 %f79, %f64, %f47;\n"
325 " fma.rn.f32 %f80, %f58, %f79, %f60;\n"
326 " mul.f32 %f81, %f79, %f80;\n"
327 " fma.rn.f32 %f82, %f79, %f81, 0f3F800000;\n"
328 " sub.f32 %f83, %f64, %f75;\n"
329 " sub.f32 %f84, %f83, %f78;\n"
330 " sub.f32 %f85, %f84, %f82;\n"
331 " add.f32 %f2, %f4, 0fBF800000;\n"
332 " add.f32 %f3, %f11, 0fBF800000;\n"
333 " // begin inline asm\n"
334 " tex.2d.v4.f32.f32 {%r15, %r16, %r17, %r18}, [%rd4, {%f2, %f3}];\n"
335 " // end inline asm\n"
336 " mov.b32 %f86, %r16;\n"
337 " mov.b32 %f87, %r15;\n"
338 " // begin inline asm\n"
339 " tex.2d.v4.f32.f32 {%r19, %r20, %r21, %r22}, [%rd4, {%f4, %f3}];\n"
340 " // end inline asm\n"
341 " mov.b32 %f88, %r20;\n"
342 " mov.b32 %f89, %r19;\n"
343 " add.f32 %f6, %f4, 0f3F800000;\n"
344 " // begin inline asm\n"
345 " tex.2d.v4.f32.f32 {%r23, %r24, %r25, %r26}, [%rd4, {%f6, %f3}];\n"
346 " // end inline asm\n"
347 " mov.b32 %f90, %r24;\n"
348 " mov.b32 %f91, %r23;\n"
349 " add.f32 %f8, %f4, 0f40000000;\n"
350 " // begin inline asm\n"
351 " tex.2d.v4.f32.f32 {%r27, %r28, %r29, %r30}, [%rd4, {%f8, %f3}];\n"
352 " // end inline asm\n"
353 " mov.b32 %f92, %r28;\n"
354 " mov.b32 %f93, %r27;\n"
355 " mul.f32 %f94, %f63, %f89;\n"
356 " mul.f32 %f95, %f63, %f88;\n"
357 " fma.rn.f32 %f96, %f57, %f87, %f94;\n"
358 " fma.rn.f32 %f97, %f57, %f86, %f95;\n"
359 " fma.rn.f32 %f98, %f68, %f91, %f96;\n"
360 " fma.rn.f32 %f99, %f68, %f90, %f97;\n"
361 " fma.rn.f32 %f100, %f71, %f93, %f98;\n"
362 " fma.rn.f32 %f101, %f71, %f92, %f99;\n"
363 " // begin inline asm\n"
364 " tex.2d.v4.f32.f32 {%r31, %r32, %r33, %r34}, [%rd4, {%f2, %f11}];\n"
365 " // end inline asm\n"
366 " mov.b32 %f102, %r32;\n"
367 " mov.b32 %f103, %r31;\n"
368 " // begin inline asm\n"
369 " tex.2d.v4.f32.f32 {%r35, %r36, %r37, %r38}, [%rd4, {%f4, %f11}];\n"
370 " // end inline asm\n"
371 " mov.b32 %f104, %r36;\n"
372 " mov.b32 %f105, %r35;\n"
373 " // begin inline asm\n"
374 " tex.2d.v4.f32.f32 {%r39, %r40, %r41, %r42}, [%rd4, {%f6, %f11}];\n"
375 " // end inline asm\n"
376 " mov.b32 %f106, %r40;\n"
377 " mov.b32 %f107, %r39;\n"
378 " // begin inline asm\n"
379 " tex.2d.v4.f32.f32 {%r43, %r44, %r45, %r46}, [%rd4, {%f8, %f11}];\n"
380 " // end inline asm\n"
381 " mov.b32 %f108, %r44;\n"
382 " mov.b32 %f109, %r43;\n"
383 " mul.f32 %f110, %f63, %f105;\n"
384 " mul.f32 %f111, %f63, %f104;\n"
385 " fma.rn.f32 %f112, %f57, %f103, %f110;\n"
386 " fma.rn.f32 %f113, %f57, %f102, %f111;\n"
387 " fma.rn.f32 %f114, %f68, %f107, %f112;\n"
388 " fma.rn.f32 %f115, %f68, %f106, %f113;\n"
389 " fma.rn.f32 %f116, %f71, %f109, %f114;\n"
390 " fma.rn.f32 %f117, %f71, %f108, %f115;\n"
391 " add.f32 %f19, %f11, 0f3F800000;\n"
392 " // begin inline asm\n"
393 " tex.2d.v4.f32.f32 {%r47, %r48, %r49, %r50}, [%rd4, {%f2, %f19}];\n"
394 " // end inline asm\n"
395 " mov.b32 %f118, %r48;\n"
396 " mov.b32 %f119, %r47;\n"
397 " // begin inline asm\n"
398 " tex.2d.v4.f32.f32 {%r51, %r52, %r53, %r54}, [%rd4, {%f4, %f19}];\n"
399 " // end inline asm\n"
400 " mov.b32 %f120, %r52;\n"
401 " mov.b32 %f121, %r51;\n"
402 " // begin inline asm\n"
403 " tex.2d.v4.f32.f32 {%r55, %r56, %r57, %r58}, [%rd4, {%f6, %f19}];\n"
404 " // end inline asm\n"
405 " mov.b32 %f122, %r56;\n"
406 " mov.b32 %f123, %r55;\n"
407 " // begin inline asm\n"
408 " tex.2d.v4.f32.f32 {%r59, %r60, %r61, %r62}, [%rd4, {%f8, %f19}];\n"
409 " // end inline asm\n"
410 " mov.b32 %f124, %r60;\n"
411 " mov.b32 %f125, %r59;\n"
412 " mul.f32 %f126, %f63, %f121;\n"
413 " mul.f32 %f127, %f63, %f120;\n"
414 " fma.rn.f32 %f128, %f57, %f119, %f126;\n"
415 " fma.rn.f32 %f129, %f57, %f118, %f127;\n"
416 " fma.rn.f32 %f130, %f68, %f123, %f128;\n"
417 " fma.rn.f32 %f131, %f68, %f122, %f129;\n"
418 " fma.rn.f32 %f132, %f71, %f125, %f130;\n"
419 " fma.rn.f32 %f133, %f71, %f124, %f131;\n"
420 " add.f32 %f27, %f11, 0f40000000;\n"
421 " // begin inline asm\n"
422 " tex.2d.v4.f32.f32 {%r63, %r64, %r65, %r66}, [%rd4, {%f2, %f27}];\n"
423 " // end inline asm\n"
424 " mov.b32 %f134, %r64;\n"
425 " mov.b32 %f135, %r63;\n"
426 " // begin inline asm\n"
427 " tex.2d.v4.f32.f32 {%r67, %r68, %r69, %r70}, [%rd4, {%f4, %f27}];\n"
428 " // end inline asm\n"
429 " mov.b32 %f136, %r68;\n"
430 " mov.b32 %f137, %r67;\n"
431 " // begin inline asm\n"
432 " tex.2d.v4.f32.f32 {%r71, %r72, %r73, %r74}, [%rd4, {%f6, %f27}];\n"
433 " // end inline asm\n"
434 " mov.b32 %f138, %r72;\n"
435 " mov.b32 %f139, %r71;\n"
436 " // begin inline asm\n"
437 " tex.2d.v4.f32.f32 {%r75, %r76, %r77, %r78}, [%rd4, {%f8, %f27}];\n"
438 " // end inline asm\n"
439 " mov.b32 %f140, %r76;\n"
440 " mov.b32 %f141, %r75;\n"
441 " mul.f32 %f142, %f63, %f137;\n"
442 " mul.f32 %f143, %f63, %f136;\n"
443 " fma.rn.f32 %f144, %f57, %f135, %f142;\n"
444 " fma.rn.f32 %f145, %f57, %f134, %f143;\n"
445 " fma.rn.f32 %f146, %f68, %f139, %f144;\n"
446 " fma.rn.f32 %f147, %f68, %f138, %f145;\n"
447 " fma.rn.f32 %f148, %f71, %f141, %f146;\n"
448 " fma.rn.f32 %f149, %f71, %f140, %f147;\n"
449 " mul.f32 %f150, %f78, %f116;\n"
450 " mul.f32 %f151, %f78, %f117;\n"
451 " fma.rn.f32 %f152, %f75, %f100, %f150;\n"
452 " fma.rn.f32 %f153, %f75, %f101, %f151;\n"
453 " fma.rn.f32 %f154, %f82, %f132, %f152;\n"
454 " fma.rn.f32 %f155, %f82, %f133, %f153;\n"
455 " fma.rn.f32 %f156, %f85, %f148, %f154;\n"
456 " fma.rn.f32 %f157, %f85, %f149, %f155;\n"
457 " mul.f32 %f158, %f156, %f48;\n"
458 " mul.f32 %f159, %f157, %f48;\n"
459 " cvt.rzi.u16.f32 %rs1, %f158;\n"
460 " cvt.rzi.u16.f32 %rs2, %f159;\n"
461 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
462 " mul.wide.s32 %rd20, %r80, 2;\n"
463 " add.s64 %rd21, %rd1, %rd20;\n"
464 " st.global.v2.u8 [%rd21], {%rs1, %rs2};\n"
469 " // .globl Subsample_Bicubic_uchar4\n"
470 ".visible .entry Subsample_Bicubic_uchar4(\n"
471 " .param .u64 Subsample_Bicubic_uchar4_param_0,\n"
472 " .param .u64 Subsample_Bicubic_uchar4_param_1,\n"
473 " .param .u32 Subsample_Bicubic_uchar4_param_2,\n"
474 " .param .u32 Subsample_Bicubic_uchar4_param_3,\n"
475 " .param .u32 Subsample_Bicubic_uchar4_param_4,\n"
476 " .param .u32 Subsample_Bicubic_uchar4_param_5,\n"
477 " .param .u32 Subsample_Bicubic_uchar4_param_6,\n"
478 " .param .u32 Subsample_Bicubic_uchar4_param_7,\n"
479 " .param .f32 Subsample_Bicubic_uchar4_param_8\n"
482 " .reg .pred %p<6>;\n"
483 " .reg .b16 %rs<5>;\n"
484 " .reg .f32 %f<234>;\n"
485 " .reg .b32 %r<81>;\n"
486 " .reg .b64 %rd<22>;\n"
488 " ld.param.u32 %r4, [Subsample_Bicubic_uchar4_param_3];\n"
489 " ld.param.u32 %r3, [Subsample_Bicubic_uchar4_param_2];\n"
490 " // begin inline asm\n"
491 " mov.u32 %r9, %ctaid.x;\n"
492 " // end inline asm\n"
493 " // begin inline asm\n"
494 " mov.u32 %r10, %ctaid.y;\n"
495 " // end inline asm\n"
496 " // begin inline asm\n"
497 " mov.u32 %r11, %ntid.x;\n"
498 " // end inline asm\n"
499 " // begin inline asm\n"
500 " mov.u32 %r12, %ntid.y;\n"
501 " // end inline asm\n"
502 " // begin inline asm\n"
503 " mov.u32 %r13, %tid.x;\n"
504 " // end inline asm\n"
505 " // begin inline asm\n"
506 " mov.u32 %r14, %tid.y;\n"
507 " // end inline asm\n"
508 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
509 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
510 " setp.ge.s32 %p1, %r2, %r4;\n"
511 " setp.ge.s32 %p2, %r1, %r3;\n"
512 " or.pred %p3, %p2, %p1;\n"
513 " @%p3 bra LBB2_2;\n"
514 " ld.param.f32 %f1, [Subsample_Bicubic_uchar4_param_8];\n"
515 " ld.param.u32 %r8, [Subsample_Bicubic_uchar4_param_7];\n"
516 " ld.param.u32 %r7, [Subsample_Bicubic_uchar4_param_6];\n"
517 " ld.param.u32 %r6, [Subsample_Bicubic_uchar4_param_5];\n"
518 " ld.param.u32 %r5, [Subsample_Bicubic_uchar4_param_4];\n"
519 " ld.param.u64 %rd4, [Subsample_Bicubic_uchar4_param_0];\n"
520 " ld.param.u64 %rd3, [Subsample_Bicubic_uchar4_param_1];\n"
521 " cvta.to.global.u64 %rd1, %rd3;\n"
522 " cvt.rn.f32.s32 %f34, %r6;\n"
523 " cvt.rn.f32.s32 %f35, %r3;\n"
524 " div.rn.f32 %f36, %f34, %f35;\n"
525 " cvt.rn.f32.s32 %f37, %r7;\n"
526 " cvt.rn.f32.s32 %f38, %r4;\n"
527 " div.rn.f32 %f39, %f37, %f38;\n"
528 " cvt.rn.f32.s32 %f40, %r1;\n"
529 " add.f32 %f41, %f40, 0f3F000000;\n"
530 " fma.rn.f32 %f42, %f36, %f41, 0fBF000000;\n"
531 " cvt.rn.f32.s32 %f43, %r2;\n"
532 " add.f32 %f44, %f43, 0f3F000000;\n"
533 " fma.rn.f32 %f45, %f39, %f44, 0fBF000000;\n"
534 " cvt.rmi.f32.f32 %f4, %f42;\n"
535 " cvt.rmi.f32.f32 %f11, %f45;\n"
536 " sub.f32 %f46, %f42, %f4;\n"
537 " sub.f32 %f47, %f45, %f11;\n"
538 " setp.gt.s32 %p4, %r8, 8;\n"
539 " selp.b32 %r79, 65535, 255, %p4;\n"
540 " cvt.rn.f32.s32 %f48, %r79;\n"
541 " setp.eq.f32 %p5, %f1, 0f497423F0;\n"
542 " neg.f32 %f49, %f1;\n"
543 " selp.f32 %f50, 0f00000000, %f49, %p5;\n"
544 " add.f32 %f51, %f46, 0f3F800000;\n"
545 " mul.f32 %f52, %f50, 0fC0A00000;\n"
546 " fma.rn.f32 %f53, %f50, %f51, %f52;\n"
547 " mul.f32 %f54, %f50, 0f41000000;\n"
548 " fma.rn.f32 %f55, %f51, %f53, %f54;\n"
549 " mul.f32 %f56, %f50, 0fC0800000;\n"
550 " fma.rn.f32 %f57, %f51, %f55, %f56;\n"
551 " add.f32 %f58, %f50, 0f40000000;\n"
552 " add.f32 %f59, %f50, 0f40400000;\n"
553 " neg.f32 %f60, %f59;\n"
554 " fma.rn.f32 %f61, %f58, %f46, %f60;\n"
555 " mul.f32 %f62, %f46, %f61;\n"
556 " fma.rn.f32 %f63, %f46, %f62, 0f3F800000;\n"
557 " mov.f32 %f64, 0f3F800000;\n"
558 " sub.f32 %f65, %f64, %f46;\n"
559 " fma.rn.f32 %f66, %f58, %f65, %f60;\n"
560 " mul.f32 %f67, %f65, %f66;\n"
561 " fma.rn.f32 %f68, %f65, %f67, 0f3F800000;\n"
562 " sub.f32 %f69, %f64, %f57;\n"
563 " sub.f32 %f70, %f69, %f63;\n"
564 " sub.f32 %f71, %f70, %f68;\n"
565 " add.f32 %f72, %f47, 0f3F800000;\n"
566 " fma.rn.f32 %f73, %f50, %f72, %f52;\n"
567 " fma.rn.f32 %f74, %f72, %f73, %f54;\n"
568 " fma.rn.f32 %f75, %f72, %f74, %f56;\n"
569 " fma.rn.f32 %f76, %f58, %f47, %f60;\n"
570 " mul.f32 %f77, %f47, %f76;\n"
571 " fma.rn.f32 %f78, %f47, %f77, 0f3F800000;\n"
572 " sub.f32 %f79, %f64, %f47;\n"
573 " fma.rn.f32 %f80, %f58, %f79, %f60;\n"
574 " mul.f32 %f81, %f79, %f80;\n"
575 " fma.rn.f32 %f82, %f79, %f81, 0f3F800000;\n"
576 " sub.f32 %f83, %f64, %f75;\n"
577 " sub.f32 %f84, %f83, %f78;\n"
578 " sub.f32 %f85, %f84, %f82;\n"
579 " add.f32 %f2, %f4, 0fBF800000;\n"
580 " add.f32 %f3, %f11, 0fBF800000;\n"
581 " // begin inline asm\n"
582 " tex.2d.v4.f32.f32 {%r15, %r16, %r17, %r18}, [%rd4, {%f2, %f3}];\n"
583 " // end inline asm\n"
584 " mov.b32 %f86, %r18;\n"
585 " mov.b32 %f87, %r17;\n"
586 " mov.b32 %f88, %r16;\n"
587 " mov.b32 %f89, %r15;\n"
588 " // begin inline asm\n"
589 " tex.2d.v4.f32.f32 {%r19, %r20, %r21, %r22}, [%rd4, {%f4, %f3}];\n"
590 " // end inline asm\n"
591 " mov.b32 %f90, %r22;\n"
592 " mov.b32 %f91, %r21;\n"
593 " mov.b32 %f92, %r20;\n"
594 " mov.b32 %f93, %r19;\n"
595 " add.f32 %f6, %f4, 0f3F800000;\n"
596 " // begin inline asm\n"
597 " tex.2d.v4.f32.f32 {%r23, %r24, %r25, %r26}, [%rd4, {%f6, %f3}];\n"
598 " // end inline asm\n"
599 " mov.b32 %f94, %r26;\n"
600 " mov.b32 %f95, %r25;\n"
601 " mov.b32 %f96, %r24;\n"
602 " mov.b32 %f97, %r23;\n"
603 " add.f32 %f8, %f4, 0f40000000;\n"
604 " // begin inline asm\n"
605 " tex.2d.v4.f32.f32 {%r27, %r28, %r29, %r30}, [%rd4, {%f8, %f3}];\n"
606 " // end inline asm\n"
607 " mov.b32 %f98, %r30;\n"
608 " mov.b32 %f99, %r29;\n"
609 " mov.b32 %f100, %r28;\n"
610 " mov.b32 %f101, %r27;\n"
611 " mul.f32 %f102, %f63, %f93;\n"
612 " mul.f32 %f103, %f63, %f92;\n"
613 " mul.f32 %f104, %f63, %f91;\n"
614 " mul.f32 %f105, %f63, %f90;\n"
615 " fma.rn.f32 %f106, %f57, %f89, %f102;\n"
616 " fma.rn.f32 %f107, %f57, %f88, %f103;\n"
617 " fma.rn.f32 %f108, %f57, %f87, %f104;\n"
618 " fma.rn.f32 %f109, %f57, %f86, %f105;\n"
619 " fma.rn.f32 %f110, %f68, %f97, %f106;\n"
620 " fma.rn.f32 %f111, %f68, %f96, %f107;\n"
621 " fma.rn.f32 %f112, %f68, %f95, %f108;\n"
622 " fma.rn.f32 %f113, %f68, %f94, %f109;\n"
623 " fma.rn.f32 %f114, %f71, %f101, %f110;\n"
624 " fma.rn.f32 %f115, %f71, %f100, %f111;\n"
625 " fma.rn.f32 %f116, %f71, %f99, %f112;\n"
626 " fma.rn.f32 %f117, %f71, %f98, %f113;\n"
627 " // begin inline asm\n"
628 " tex.2d.v4.f32.f32 {%r31, %r32, %r33, %r34}, [%rd4, {%f2, %f11}];\n"
629 " // end inline asm\n"
630 " mov.b32 %f118, %r34;\n"
631 " mov.b32 %f119, %r33;\n"
632 " mov.b32 %f120, %r32;\n"
633 " mov.b32 %f121, %r31;\n"
634 " // begin inline asm\n"
635 " tex.2d.v4.f32.f32 {%r35, %r36, %r37, %r38}, [%rd4, {%f4, %f11}];\n"
636 " // end inline asm\n"
637 " mov.b32 %f122, %r38;\n"
638 " mov.b32 %f123, %r37;\n"
639 " mov.b32 %f124, %r36;\n"
640 " mov.b32 %f125, %r35;\n"
641 " // begin inline asm\n"
642 " tex.2d.v4.f32.f32 {%r39, %r40, %r41, %r42}, [%rd4, {%f6, %f11}];\n"
643 " // end inline asm\n"
644 " mov.b32 %f126, %r42;\n"
645 " mov.b32 %f127, %r41;\n"
646 " mov.b32 %f128, %r40;\n"
647 " mov.b32 %f129, %r39;\n"
648 " // begin inline asm\n"
649 " tex.2d.v4.f32.f32 {%r43, %r44, %r45, %r46}, [%rd4, {%f8, %f11}];\n"
650 " // end inline asm\n"
651 " mov.b32 %f130, %r46;\n"
652 " mov.b32 %f131, %r45;\n"
653 " mov.b32 %f132, %r44;\n"
654 " mov.b32 %f133, %r43;\n"
655 " mul.f32 %f134, %f63, %f125;\n"
656 " mul.f32 %f135, %f63, %f124;\n"
657 " mul.f32 %f136, %f63, %f123;\n"
658 " mul.f32 %f137, %f63, %f122;\n"
659 " fma.rn.f32 %f138, %f57, %f121, %f134;\n"
660 " fma.rn.f32 %f139, %f57, %f120, %f135;\n"
661 " fma.rn.f32 %f140, %f57, %f119, %f136;\n"
662 " fma.rn.f32 %f141, %f57, %f118, %f137;\n"
663 " fma.rn.f32 %f142, %f68, %f129, %f138;\n"
664 " fma.rn.f32 %f143, %f68, %f128, %f139;\n"
665 " fma.rn.f32 %f144, %f68, %f127, %f140;\n"
666 " fma.rn.f32 %f145, %f68, %f126, %f141;\n"
667 " fma.rn.f32 %f146, %f71, %f133, %f142;\n"
668 " fma.rn.f32 %f147, %f71, %f132, %f143;\n"
669 " fma.rn.f32 %f148, %f71, %f131, %f144;\n"
670 " fma.rn.f32 %f149, %f71, %f130, %f145;\n"
671 " add.f32 %f19, %f11, 0f3F800000;\n"
672 " // begin inline asm\n"
673 " tex.2d.v4.f32.f32 {%r47, %r48, %r49, %r50}, [%rd4, {%f2, %f19}];\n"
674 " // end inline asm\n"
675 " mov.b32 %f150, %r50;\n"
676 " mov.b32 %f151, %r49;\n"
677 " mov.b32 %f152, %r48;\n"
678 " mov.b32 %f153, %r47;\n"
679 " // begin inline asm\n"
680 " tex.2d.v4.f32.f32 {%r51, %r52, %r53, %r54}, [%rd4, {%f4, %f19}];\n"
681 " // end inline asm\n"
682 " mov.b32 %f154, %r54;\n"
683 " mov.b32 %f155, %r53;\n"
684 " mov.b32 %f156, %r52;\n"
685 " mov.b32 %f157, %r51;\n"
686 " // begin inline asm\n"
687 " tex.2d.v4.f32.f32 {%r55, %r56, %r57, %r58}, [%rd4, {%f6, %f19}];\n"
688 " // end inline asm\n"
689 " mov.b32 %f158, %r58;\n"
690 " mov.b32 %f159, %r57;\n"
691 " mov.b32 %f160, %r56;\n"
692 " mov.b32 %f161, %r55;\n"
693 " // begin inline asm\n"
694 " tex.2d.v4.f32.f32 {%r59, %r60, %r61, %r62}, [%rd4, {%f8, %f19}];\n"
695 " // end inline asm\n"
696 " mov.b32 %f162, %r62;\n"
697 " mov.b32 %f163, %r61;\n"
698 " mov.b32 %f164, %r60;\n"
699 " mov.b32 %f165, %r59;\n"
700 " mul.f32 %f166, %f63, %f157;\n"
701 " mul.f32 %f167, %f63, %f156;\n"
702 " mul.f32 %f168, %f63, %f155;\n"
703 " mul.f32 %f169, %f63, %f154;\n"
704 " fma.rn.f32 %f170, %f57, %f153, %f166;\n"
705 " fma.rn.f32 %f171, %f57, %f152, %f167;\n"
706 " fma.rn.f32 %f172, %f57, %f151, %f168;\n"
707 " fma.rn.f32 %f173, %f57, %f150, %f169;\n"
708 " fma.rn.f32 %f174, %f68, %f161, %f170;\n"
709 " fma.rn.f32 %f175, %f68, %f160, %f171;\n"
710 " fma.rn.f32 %f176, %f68, %f159, %f172;\n"
711 " fma.rn.f32 %f177, %f68, %f158, %f173;\n"
712 " fma.rn.f32 %f178, %f71, %f165, %f174;\n"
713 " fma.rn.f32 %f179, %f71, %f164, %f175;\n"
714 " fma.rn.f32 %f180, %f71, %f163, %f176;\n"
715 " fma.rn.f32 %f181, %f71, %f162, %f177;\n"
716 " add.f32 %f27, %f11, 0f40000000;\n"
717 " // begin inline asm\n"
718 " tex.2d.v4.f32.f32 {%r63, %r64, %r65, %r66}, [%rd4, {%f2, %f27}];\n"
719 " // end inline asm\n"
720 " mov.b32 %f182, %r66;\n"
721 " mov.b32 %f183, %r65;\n"
722 " mov.b32 %f184, %r64;\n"
723 " mov.b32 %f185, %r63;\n"
724 " // begin inline asm\n"
725 " tex.2d.v4.f32.f32 {%r67, %r68, %r69, %r70}, [%rd4, {%f4, %f27}];\n"
726 " // end inline asm\n"
727 " mov.b32 %f186, %r70;\n"
728 " mov.b32 %f187, %r69;\n"
729 " mov.b32 %f188, %r68;\n"
730 " mov.b32 %f189, %r67;\n"
731 " // begin inline asm\n"
732 " tex.2d.v4.f32.f32 {%r71, %r72, %r73, %r74}, [%rd4, {%f6, %f27}];\n"
733 " // end inline asm\n"
734 " mov.b32 %f190, %r74;\n"
735 " mov.b32 %f191, %r73;\n"
736 " mov.b32 %f192, %r72;\n"
737 " mov.b32 %f193, %r71;\n"
738 " // begin inline asm\n"
739 " tex.2d.v4.f32.f32 {%r75, %r76, %r77, %r78}, [%rd4, {%f8, %f27}];\n"
740 " // end inline asm\n"
741 " mov.b32 %f194, %r78;\n"
742 " mov.b32 %f195, %r77;\n"
743 " mov.b32 %f196, %r76;\n"
744 " mov.b32 %f197, %r75;\n"
745 " mul.f32 %f198, %f63, %f189;\n"
746 " mul.f32 %f199, %f63, %f188;\n"
747 " mul.f32 %f200, %f63, %f187;\n"
748 " mul.f32 %f201, %f63, %f186;\n"
749 " fma.rn.f32 %f202, %f57, %f185, %f198;\n"
750 " fma.rn.f32 %f203, %f57, %f184, %f199;\n"
751 " fma.rn.f32 %f204, %f57, %f183, %f200;\n"
752 " fma.rn.f32 %f205, %f57, %f182, %f201;\n"
753 " fma.rn.f32 %f206, %f68, %f193, %f202;\n"
754 " fma.rn.f32 %f207, %f68, %f192, %f203;\n"
755 " fma.rn.f32 %f208, %f68, %f191, %f204;\n"
756 " fma.rn.f32 %f209, %f68, %f190, %f205;\n"
757 " fma.rn.f32 %f210, %f71, %f197, %f206;\n"
758 " fma.rn.f32 %f211, %f71, %f196, %f207;\n"
759 " fma.rn.f32 %f212, %f71, %f195, %f208;\n"
760 " fma.rn.f32 %f213, %f71, %f194, %f209;\n"
761 " mul.f32 %f214, %f78, %f146;\n"
762 " mul.f32 %f215, %f78, %f147;\n"
763 " mul.f32 %f216, %f78, %f148;\n"
764 " mul.f32 %f217, %f78, %f149;\n"
765 " fma.rn.f32 %f218, %f75, %f114, %f214;\n"
766 " fma.rn.f32 %f219, %f75, %f115, %f215;\n"
767 " fma.rn.f32 %f220, %f75, %f116, %f216;\n"
768 " fma.rn.f32 %f221, %f75, %f117, %f217;\n"
769 " fma.rn.f32 %f222, %f82, %f178, %f218;\n"
770 " fma.rn.f32 %f223, %f82, %f179, %f219;\n"
771 " fma.rn.f32 %f224, %f82, %f180, %f220;\n"
772 " fma.rn.f32 %f225, %f82, %f181, %f221;\n"
773 " fma.rn.f32 %f226, %f85, %f210, %f222;\n"
774 " fma.rn.f32 %f227, %f85, %f211, %f223;\n"
775 " fma.rn.f32 %f228, %f85, %f212, %f224;\n"
776 " fma.rn.f32 %f229, %f85, %f213, %f225;\n"
777 " mul.f32 %f230, %f226, %f48;\n"
778 " mul.f32 %f231, %f227, %f48;\n"
779 " mul.f32 %f232, %f228, %f48;\n"
780 " mul.f32 %f233, %f229, %f48;\n"
781 " cvt.rzi.u16.f32 %rs1, %f230;\n"
782 " cvt.rzi.u16.f32 %rs2, %f231;\n"
783 " cvt.rzi.u16.f32 %rs3, %f232;\n"
784 " cvt.rzi.u16.f32 %rs4, %f233;\n"
785 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
786 " mul.wide.s32 %rd20, %r80, 4;\n"
787 " add.s64 %rd21, %rd1, %rd20;\n"
788 " st.global.v4.u8 [%rd21], {%rs1, %rs2, %rs3, %rs4};\n"
793 " // .globl Subsample_Bicubic_ushort\n"
794 ".visible .entry Subsample_Bicubic_ushort(\n"
795 " .param .u64 Subsample_Bicubic_ushort_param_0,\n"
796 " .param .u64 Subsample_Bicubic_ushort_param_1,\n"
797 " .param .u32 Subsample_Bicubic_ushort_param_2,\n"
798 " .param .u32 Subsample_Bicubic_ushort_param_3,\n"
799 " .param .u32 Subsample_Bicubic_ushort_param_4,\n"
800 " .param .u32 Subsample_Bicubic_ushort_param_5,\n"
801 " .param .u32 Subsample_Bicubic_ushort_param_6,\n"
802 " .param .u32 Subsample_Bicubic_ushort_param_7,\n"
803 " .param .f32 Subsample_Bicubic_ushort_param_8\n"
806 " .reg .pred %p<6>;\n"
807 " .reg .b16 %rs<2>;\n"
808 " .reg .f32 %f<123>;\n"
809 " .reg .b32 %r<81>;\n"
810 " .reg .b64 %rd<22>;\n"
812 " ld.param.u32 %r4, [Subsample_Bicubic_ushort_param_3];\n"
813 " ld.param.u32 %r3, [Subsample_Bicubic_ushort_param_2];\n"
814 " // begin inline asm\n"
815 " mov.u32 %r9, %ctaid.x;\n"
816 " // end inline asm\n"
817 " // begin inline asm\n"
818 " mov.u32 %r10, %ctaid.y;\n"
819 " // end inline asm\n"
820 " // begin inline asm\n"
821 " mov.u32 %r11, %ntid.x;\n"
822 " // end inline asm\n"
823 " // begin inline asm\n"
824 " mov.u32 %r12, %ntid.y;\n"
825 " // end inline asm\n"
826 " // begin inline asm\n"
827 " mov.u32 %r13, %tid.x;\n"
828 " // end inline asm\n"
829 " // begin inline asm\n"
830 " mov.u32 %r14, %tid.y;\n"
831 " // end inline asm\n"
832 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
833 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
834 " setp.ge.s32 %p1, %r2, %r4;\n"
835 " setp.ge.s32 %p2, %r1, %r3;\n"
836 " or.pred %p3, %p2, %p1;\n"
837 " @%p3 bra LBB3_2;\n"
838 " ld.param.f32 %f1, [Subsample_Bicubic_ushort_param_8];\n"
839 " ld.param.u32 %r8, [Subsample_Bicubic_ushort_param_7];\n"
840 " ld.param.u32 %r7, [Subsample_Bicubic_ushort_param_6];\n"
841 " ld.param.u32 %r6, [Subsample_Bicubic_ushort_param_5];\n"
842 " ld.param.u32 %r5, [Subsample_Bicubic_ushort_param_4];\n"
843 " ld.param.u64 %rd4, [Subsample_Bicubic_ushort_param_0];\n"
844 " ld.param.u64 %rd3, [Subsample_Bicubic_ushort_param_1];\n"
845 " cvta.to.global.u64 %rd1, %rd3;\n"
846 " cvt.rn.f32.s32 %f34, %r6;\n"
847 " cvt.rn.f32.s32 %f35, %r3;\n"
848 " div.rn.f32 %f36, %f34, %f35;\n"
849 " cvt.rn.f32.s32 %f37, %r7;\n"
850 " cvt.rn.f32.s32 %f38, %r4;\n"
851 " div.rn.f32 %f39, %f37, %f38;\n"
852 " cvt.rn.f32.s32 %f40, %r1;\n"
853 " add.f32 %f41, %f40, 0f3F000000;\n"
854 " fma.rn.f32 %f42, %f36, %f41, 0fBF000000;\n"
855 " cvt.rn.f32.s32 %f43, %r2;\n"
856 " add.f32 %f44, %f43, 0f3F000000;\n"
857 " fma.rn.f32 %f45, %f39, %f44, 0fBF000000;\n"
858 " cvt.rmi.f32.f32 %f4, %f42;\n"
859 " cvt.rmi.f32.f32 %f11, %f45;\n"
860 " sub.f32 %f46, %f42, %f4;\n"
861 " sub.f32 %f47, %f45, %f11;\n"
862 " setp.gt.s32 %p4, %r8, 8;\n"
863 " selp.b32 %r79, 65535, 255, %p4;\n"
864 " cvt.rn.f32.s32 %f48, %r79;\n"
865 " setp.eq.f32 %p5, %f1, 0f497423F0;\n"
866 " neg.f32 %f49, %f1;\n"
867 " selp.f32 %f50, 0f00000000, %f49, %p5;\n"
868 " add.f32 %f51, %f46, 0f3F800000;\n"
869 " mul.f32 %f52, %f50, 0fC0A00000;\n"
870 " fma.rn.f32 %f53, %f50, %f51, %f52;\n"
871 " mul.f32 %f54, %f50, 0f41000000;\n"
872 " fma.rn.f32 %f55, %f51, %f53, %f54;\n"
873 " mul.f32 %f56, %f50, 0fC0800000;\n"
874 " fma.rn.f32 %f57, %f51, %f55, %f56;\n"
875 " add.f32 %f58, %f50, 0f40000000;\n"
876 " add.f32 %f59, %f50, 0f40400000;\n"
877 " neg.f32 %f60, %f59;\n"
878 " fma.rn.f32 %f61, %f58, %f46, %f60;\n"
879 " mul.f32 %f62, %f46, %f61;\n"
880 " fma.rn.f32 %f63, %f46, %f62, 0f3F800000;\n"
881 " mov.f32 %f64, 0f3F800000;\n"
882 " sub.f32 %f65, %f64, %f46;\n"
883 " fma.rn.f32 %f66, %f58, %f65, %f60;\n"
884 " mul.f32 %f67, %f65, %f66;\n"
885 " fma.rn.f32 %f68, %f65, %f67, 0f3F800000;\n"
886 " sub.f32 %f69, %f64, %f57;\n"
887 " sub.f32 %f70, %f69, %f63;\n"
888 " sub.f32 %f71, %f70, %f68;\n"
889 " add.f32 %f72, %f47, 0f3F800000;\n"
890 " fma.rn.f32 %f73, %f50, %f72, %f52;\n"
891 " fma.rn.f32 %f74, %f72, %f73, %f54;\n"
892 " fma.rn.f32 %f75, %f72, %f74, %f56;\n"
893 " fma.rn.f32 %f76, %f58, %f47, %f60;\n"
894 " mul.f32 %f77, %f47, %f76;\n"
895 " fma.rn.f32 %f78, %f47, %f77, 0f3F800000;\n"
896 " sub.f32 %f79, %f64, %f47;\n"
897 " fma.rn.f32 %f80, %f58, %f79, %f60;\n"
898 " mul.f32 %f81, %f79, %f80;\n"
899 " fma.rn.f32 %f82, %f79, %f81, 0f3F800000;\n"
900 " sub.f32 %f83, %f64, %f75;\n"
901 " sub.f32 %f84, %f83, %f78;\n"
902 " sub.f32 %f85, %f84, %f82;\n"
903 " add.f32 %f2, %f4, 0fBF800000;\n"
904 " add.f32 %f3, %f11, 0fBF800000;\n"
905 " // begin inline asm\n"
906 " tex.2d.v4.f32.f32 {%r15, %r16, %r17, %r18}, [%rd4, {%f2, %f3}];\n"
907 " // end inline asm\n"
908 " mov.b32 %f86, %r15;\n"
909 " // begin inline asm\n"
910 " tex.2d.v4.f32.f32 {%r19, %r20, %r21, %r22}, [%rd4, {%f4, %f3}];\n"
911 " // end inline asm\n"
912 " mov.b32 %f87, %r19;\n"
913 " add.f32 %f6, %f4, 0f3F800000;\n"
914 " // begin inline asm\n"
915 " tex.2d.v4.f32.f32 {%r23, %r24, %r25, %r26}, [%rd4, {%f6, %f3}];\n"
916 " // end inline asm\n"
917 " mov.b32 %f88, %r23;\n"
918 " add.f32 %f8, %f4, 0f40000000;\n"
919 " // begin inline asm\n"
920 " tex.2d.v4.f32.f32 {%r27, %r28, %r29, %r30}, [%rd4, {%f8, %f3}];\n"
921 " // end inline asm\n"
922 " mov.b32 %f89, %r27;\n"
923 " mul.f32 %f90, %f63, %f87;\n"
924 " fma.rn.f32 %f91, %f57, %f86, %f90;\n"
925 " fma.rn.f32 %f92, %f68, %f88, %f91;\n"
926 " fma.rn.f32 %f93, %f71, %f89, %f92;\n"
927 " // begin inline asm\n"
928 " tex.2d.v4.f32.f32 {%r31, %r32, %r33, %r34}, [%rd4, {%f2, %f11}];\n"
929 " // end inline asm\n"
930 " mov.b32 %f94, %r31;\n"
931 " // begin inline asm\n"
932 " tex.2d.v4.f32.f32 {%r35, %r36, %r37, %r38}, [%rd4, {%f4, %f11}];\n"
933 " // end inline asm\n"
934 " mov.b32 %f95, %r35;\n"
935 " // begin inline asm\n"
936 " tex.2d.v4.f32.f32 {%r39, %r40, %r41, %r42}, [%rd4, {%f6, %f11}];\n"
937 " // end inline asm\n"
938 " mov.b32 %f96, %r39;\n"
939 " // begin inline asm\n"
940 " tex.2d.v4.f32.f32 {%r43, %r44, %r45, %r46}, [%rd4, {%f8, %f11}];\n"
941 " // end inline asm\n"
942 " mov.b32 %f97, %r43;\n"
943 " mul.f32 %f98, %f63, %f95;\n"
944 " fma.rn.f32 %f99, %f57, %f94, %f98;\n"
945 " fma.rn.f32 %f100, %f68, %f96, %f99;\n"
946 " fma.rn.f32 %f101, %f71, %f97, %f100;\n"
947 " add.f32 %f19, %f11, 0f3F800000;\n"
948 " // begin inline asm\n"
949 " tex.2d.v4.f32.f32 {%r47, %r48, %r49, %r50}, [%rd4, {%f2, %f19}];\n"
950 " // end inline asm\n"
951 " mov.b32 %f102, %r47;\n"
952 " // begin inline asm\n"
953 " tex.2d.v4.f32.f32 {%r51, %r52, %r53, %r54}, [%rd4, {%f4, %f19}];\n"
954 " // end inline asm\n"
955 " mov.b32 %f103, %r51;\n"
956 " // begin inline asm\n"
957 " tex.2d.v4.f32.f32 {%r55, %r56, %r57, %r58}, [%rd4, {%f6, %f19}];\n"
958 " // end inline asm\n"
959 " mov.b32 %f104, %r55;\n"
960 " // begin inline asm\n"
961 " tex.2d.v4.f32.f32 {%r59, %r60, %r61, %r62}, [%rd4, {%f8, %f19}];\n"
962 " // end inline asm\n"
963 " mov.b32 %f105, %r59;\n"
964 " mul.f32 %f106, %f63, %f103;\n"
965 " fma.rn.f32 %f107, %f57, %f102, %f106;\n"
966 " fma.rn.f32 %f108, %f68, %f104, %f107;\n"
967 " fma.rn.f32 %f109, %f71, %f105, %f108;\n"
968 " add.f32 %f27, %f11, 0f40000000;\n"
969 " // begin inline asm\n"
970 " tex.2d.v4.f32.f32 {%r63, %r64, %r65, %r66}, [%rd4, {%f2, %f27}];\n"
971 " // end inline asm\n"
972 " mov.b32 %f110, %r63;\n"
973 " // begin inline asm\n"
974 " tex.2d.v4.f32.f32 {%r67, %r68, %r69, %r70}, [%rd4, {%f4, %f27}];\n"
975 " // end inline asm\n"
976 " mov.b32 %f111, %r67;\n"
977 " // begin inline asm\n"
978 " tex.2d.v4.f32.f32 {%r71, %r72, %r73, %r74}, [%rd4, {%f6, %f27}];\n"
979 " // end inline asm\n"
980 " mov.b32 %f112, %r71;\n"
981 " // begin inline asm\n"
982 " tex.2d.v4.f32.f32 {%r75, %r76, %r77, %r78}, [%rd4, {%f8, %f27}];\n"
983 " // end inline asm\n"
984 " mov.b32 %f113, %r75;\n"
985 " mul.f32 %f114, %f63, %f111;\n"
986 " fma.rn.f32 %f115, %f57, %f110, %f114;\n"
987 " fma.rn.f32 %f116, %f68, %f112, %f115;\n"
988 " fma.rn.f32 %f117, %f71, %f113, %f116;\n"
989 " mul.f32 %f118, %f78, %f101;\n"
990 " fma.rn.f32 %f119, %f75, %f93, %f118;\n"
991 " fma.rn.f32 %f120, %f82, %f109, %f119;\n"
992 " fma.rn.f32 %f121, %f85, %f117, %f120;\n"
993 " mul.f32 %f122, %f121, %f48;\n"
994 " cvt.rzi.u16.f32 %rs1, %f122;\n"
995 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
996 " mul.wide.s32 %rd20, %r80, 2;\n"
997 " add.s64 %rd21, %rd1, %rd20;\n"
998 " st.global.u16 [%rd21], %rs1;\n"
1003 " // .globl Subsample_Bicubic_ushort2\n"
1004 ".visible .entry Subsample_Bicubic_ushort2(\n"
1005 " .param .u64 Subsample_Bicubic_ushort2_param_0,\n"
1006 " .param .u64 Subsample_Bicubic_ushort2_param_1,\n"
1007 " .param .u32 Subsample_Bicubic_ushort2_param_2,\n"
1008 " .param .u32 Subsample_Bicubic_ushort2_param_3,\n"
1009 " .param .u32 Subsample_Bicubic_ushort2_param_4,\n"
1010 " .param .u32 Subsample_Bicubic_ushort2_param_5,\n"
1011 " .param .u32 Subsample_Bicubic_ushort2_param_6,\n"
1012 " .param .u32 Subsample_Bicubic_ushort2_param_7,\n"
1013 " .param .f32 Subsample_Bicubic_ushort2_param_8\n"
1016 " .reg .pred %p<6>;\n"
1017 " .reg .b16 %rs<3>;\n"
1018 " .reg .f32 %f<160>;\n"
1019 " .reg .b32 %r<81>;\n"
1020 " .reg .b64 %rd<22>;\n"
1022 " ld.param.u32 %r4, [Subsample_Bicubic_ushort2_param_3];\n"
1023 " ld.param.u32 %r3, [Subsample_Bicubic_ushort2_param_2];\n"
1024 " // begin inline asm\n"
1025 " mov.u32 %r9, %ctaid.x;\n"
1026 " // end inline asm\n"
1027 " // begin inline asm\n"
1028 " mov.u32 %r10, %ctaid.y;\n"
1029 " // end inline asm\n"
1030 " // begin inline asm\n"
1031 " mov.u32 %r11, %ntid.x;\n"
1032 " // end inline asm\n"
1033 " // begin inline asm\n"
1034 " mov.u32 %r12, %ntid.y;\n"
1035 " // end inline asm\n"
1036 " // begin inline asm\n"
1037 " mov.u32 %r13, %tid.x;\n"
1038 " // end inline asm\n"
1039 " // begin inline asm\n"
1040 " mov.u32 %r14, %tid.y;\n"
1041 " // end inline asm\n"
1042 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
1043 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
1044 " setp.ge.s32 %p1, %r2, %r4;\n"
1045 " setp.ge.s32 %p2, %r1, %r3;\n"
1046 " or.pred %p3, %p2, %p1;\n"
1047 " @%p3 bra LBB4_2;\n"
1048 " ld.param.f32 %f1, [Subsample_Bicubic_ushort2_param_8];\n"
1049 " ld.param.u32 %r8, [Subsample_Bicubic_ushort2_param_7];\n"
1050 " ld.param.u32 %r7, [Subsample_Bicubic_ushort2_param_6];\n"
1051 " ld.param.u32 %r6, [Subsample_Bicubic_ushort2_param_5];\n"
1052 " ld.param.u32 %r5, [Subsample_Bicubic_ushort2_param_4];\n"
1053 " ld.param.u64 %rd4, [Subsample_Bicubic_ushort2_param_0];\n"
1054 " ld.param.u64 %rd3, [Subsample_Bicubic_ushort2_param_1];\n"
1055 " cvta.to.global.u64 %rd1, %rd3;\n"
1056 " cvt.rn.f32.s32 %f34, %r6;\n"
1057 " cvt.rn.f32.s32 %f35, %r3;\n"
1058 " div.rn.f32 %f36, %f34, %f35;\n"
1059 " cvt.rn.f32.s32 %f37, %r7;\n"
1060 " cvt.rn.f32.s32 %f38, %r4;\n"
1061 " div.rn.f32 %f39, %f37, %f38;\n"
1062 " cvt.rn.f32.s32 %f40, %r1;\n"
1063 " add.f32 %f41, %f40, 0f3F000000;\n"
1064 " fma.rn.f32 %f42, %f36, %f41, 0fBF000000;\n"
1065 " cvt.rn.f32.s32 %f43, %r2;\n"
1066 " add.f32 %f44, %f43, 0f3F000000;\n"
1067 " fma.rn.f32 %f45, %f39, %f44, 0fBF000000;\n"
1068 " cvt.rmi.f32.f32 %f4, %f42;\n"
1069 " cvt.rmi.f32.f32 %f11, %f45;\n"
1070 " sub.f32 %f46, %f42, %f4;\n"
1071 " sub.f32 %f47, %f45, %f11;\n"
1072 " setp.gt.s32 %p4, %r8, 8;\n"
1073 " selp.b32 %r79, 65535, 255, %p4;\n"
1074 " cvt.rn.f32.s32 %f48, %r79;\n"
1075 " setp.eq.f32 %p5, %f1, 0f497423F0;\n"
1076 " neg.f32 %f49, %f1;\n"
1077 " selp.f32 %f50, 0f00000000, %f49, %p5;\n"
1078 " add.f32 %f51, %f46, 0f3F800000;\n"
1079 " mul.f32 %f52, %f50, 0fC0A00000;\n"
1080 " fma.rn.f32 %f53, %f50, %f51, %f52;\n"
1081 " mul.f32 %f54, %f50, 0f41000000;\n"
1082 " fma.rn.f32 %f55, %f51, %f53, %f54;\n"
1083 " mul.f32 %f56, %f50, 0fC0800000;\n"
1084 " fma.rn.f32 %f57, %f51, %f55, %f56;\n"
1085 " add.f32 %f58, %f50, 0f40000000;\n"
1086 " add.f32 %f59, %f50, 0f40400000;\n"
1087 " neg.f32 %f60, %f59;\n"
1088 " fma.rn.f32 %f61, %f58, %f46, %f60;\n"
1089 " mul.f32 %f62, %f46, %f61;\n"
1090 " fma.rn.f32 %f63, %f46, %f62, 0f3F800000;\n"
1091 " mov.f32 %f64, 0f3F800000;\n"
1092 " sub.f32 %f65, %f64, %f46;\n"
1093 " fma.rn.f32 %f66, %f58, %f65, %f60;\n"
1094 " mul.f32 %f67, %f65, %f66;\n"
1095 " fma.rn.f32 %f68, %f65, %f67, 0f3F800000;\n"
1096 " sub.f32 %f69, %f64, %f57;\n"
1097 " sub.f32 %f70, %f69, %f63;\n"
1098 " sub.f32 %f71, %f70, %f68;\n"
1099 " add.f32 %f72, %f47, 0f3F800000;\n"
1100 " fma.rn.f32 %f73, %f50, %f72, %f52;\n"
1101 " fma.rn.f32 %f74, %f72, %f73, %f54;\n"
1102 " fma.rn.f32 %f75, %f72, %f74, %f56;\n"
1103 " fma.rn.f32 %f76, %f58, %f47, %f60;\n"
1104 " mul.f32 %f77, %f47, %f76;\n"
1105 " fma.rn.f32 %f78, %f47, %f77, 0f3F800000;\n"
1106 " sub.f32 %f79, %f64, %f47;\n"
1107 " fma.rn.f32 %f80, %f58, %f79, %f60;\n"
1108 " mul.f32 %f81, %f79, %f80;\n"
1109 " fma.rn.f32 %f82, %f79, %f81, 0f3F800000;\n"
1110 " sub.f32 %f83, %f64, %f75;\n"
1111 " sub.f32 %f84, %f83, %f78;\n"
1112 " sub.f32 %f85, %f84, %f82;\n"
1113 " add.f32 %f2, %f4, 0fBF800000;\n"
1114 " add.f32 %f3, %f11, 0fBF800000;\n"
1115 " // begin inline asm\n"
1116 " tex.2d.v4.f32.f32 {%r15, %r16, %r17, %r18}, [%rd4, {%f2, %f3}];\n"
1117 " // end inline asm\n"
1118 " mov.b32 %f86, %r16;\n"
1119 " mov.b32 %f87, %r15;\n"
1120 " // begin inline asm\n"
1121 " tex.2d.v4.f32.f32 {%r19, %r20, %r21, %r22}, [%rd4, {%f4, %f3}];\n"
1122 " // end inline asm\n"
1123 " mov.b32 %f88, %r20;\n"
1124 " mov.b32 %f89, %r19;\n"
1125 " add.f32 %f6, %f4, 0f3F800000;\n"
1126 " // begin inline asm\n"
1127 " tex.2d.v4.f32.f32 {%r23, %r24, %r25, %r26}, [%rd4, {%f6, %f3}];\n"
1128 " // end inline asm\n"
1129 " mov.b32 %f90, %r24;\n"
1130 " mov.b32 %f91, %r23;\n"
1131 " add.f32 %f8, %f4, 0f40000000;\n"
1132 " // begin inline asm\n"
1133 " tex.2d.v4.f32.f32 {%r27, %r28, %r29, %r30}, [%rd4, {%f8, %f3}];\n"
1134 " // end inline asm\n"
1135 " mov.b32 %f92, %r28;\n"
1136 " mov.b32 %f93, %r27;\n"
1137 " mul.f32 %f94, %f63, %f89;\n"
1138 " mul.f32 %f95, %f63, %f88;\n"
1139 " fma.rn.f32 %f96, %f57, %f87, %f94;\n"
1140 " fma.rn.f32 %f97, %f57, %f86, %f95;\n"
1141 " fma.rn.f32 %f98, %f68, %f91, %f96;\n"
1142 " fma.rn.f32 %f99, %f68, %f90, %f97;\n"
1143 " fma.rn.f32 %f100, %f71, %f93, %f98;\n"
1144 " fma.rn.f32 %f101, %f71, %f92, %f99;\n"
1145 " // begin inline asm\n"
1146 " tex.2d.v4.f32.f32 {%r31, %r32, %r33, %r34}, [%rd4, {%f2, %f11}];\n"
1147 " // end inline asm\n"
1148 " mov.b32 %f102, %r32;\n"
1149 " mov.b32 %f103, %r31;\n"
1150 " // begin inline asm\n"
1151 " tex.2d.v4.f32.f32 {%r35, %r36, %r37, %r38}, [%rd4, {%f4, %f11}];\n"
1152 " // end inline asm\n"
1153 " mov.b32 %f104, %r36;\n"
1154 " mov.b32 %f105, %r35;\n"
1155 " // begin inline asm\n"
1156 " tex.2d.v4.f32.f32 {%r39, %r40, %r41, %r42}, [%rd4, {%f6, %f11}];\n"
1157 " // end inline asm\n"
1158 " mov.b32 %f106, %r40;\n"
1159 " mov.b32 %f107, %r39;\n"
1160 " // begin inline asm\n"
1161 " tex.2d.v4.f32.f32 {%r43, %r44, %r45, %r46}, [%rd4, {%f8, %f11}];\n"
1162 " // end inline asm\n"
1163 " mov.b32 %f108, %r44;\n"
1164 " mov.b32 %f109, %r43;\n"
1165 " mul.f32 %f110, %f63, %f105;\n"
1166 " mul.f32 %f111, %f63, %f104;\n"
1167 " fma.rn.f32 %f112, %f57, %f103, %f110;\n"
1168 " fma.rn.f32 %f113, %f57, %f102, %f111;\n"
1169 " fma.rn.f32 %f114, %f68, %f107, %f112;\n"
1170 " fma.rn.f32 %f115, %f68, %f106, %f113;\n"
1171 " fma.rn.f32 %f116, %f71, %f109, %f114;\n"
1172 " fma.rn.f32 %f117, %f71, %f108, %f115;\n"
1173 " add.f32 %f19, %f11, 0f3F800000;\n"
1174 " // begin inline asm\n"
1175 " tex.2d.v4.f32.f32 {%r47, %r48, %r49, %r50}, [%rd4, {%f2, %f19}];\n"
1176 " // end inline asm\n"
1177 " mov.b32 %f118, %r48;\n"
1178 " mov.b32 %f119, %r47;\n"
1179 " // begin inline asm\n"
1180 " tex.2d.v4.f32.f32 {%r51, %r52, %r53, %r54}, [%rd4, {%f4, %f19}];\n"
1181 " // end inline asm\n"
1182 " mov.b32 %f120, %r52;\n"
1183 " mov.b32 %f121, %r51;\n"
1184 " // begin inline asm\n"
1185 " tex.2d.v4.f32.f32 {%r55, %r56, %r57, %r58}, [%rd4, {%f6, %f19}];\n"
1186 " // end inline asm\n"
1187 " mov.b32 %f122, %r56;\n"
1188 " mov.b32 %f123, %r55;\n"
1189 " // begin inline asm\n"
1190 " tex.2d.v4.f32.f32 {%r59, %r60, %r61, %r62}, [%rd4, {%f8, %f19}];\n"
1191 " // end inline asm\n"
1192 " mov.b32 %f124, %r60;\n"
1193 " mov.b32 %f125, %r59;\n"
1194 " mul.f32 %f126, %f63, %f121;\n"
1195 " mul.f32 %f127, %f63, %f120;\n"
1196 " fma.rn.f32 %f128, %f57, %f119, %f126;\n"
1197 " fma.rn.f32 %f129, %f57, %f118, %f127;\n"
1198 " fma.rn.f32 %f130, %f68, %f123, %f128;\n"
1199 " fma.rn.f32 %f131, %f68, %f122, %f129;\n"
1200 " fma.rn.f32 %f132, %f71, %f125, %f130;\n"
1201 " fma.rn.f32 %f133, %f71, %f124, %f131;\n"
1202 " add.f32 %f27, %f11, 0f40000000;\n"
1203 " // begin inline asm\n"
1204 " tex.2d.v4.f32.f32 {%r63, %r64, %r65, %r66}, [%rd4, {%f2, %f27}];\n"
1205 " // end inline asm\n"
1206 " mov.b32 %f134, %r64;\n"
1207 " mov.b32 %f135, %r63;\n"
1208 " // begin inline asm\n"
1209 " tex.2d.v4.f32.f32 {%r67, %r68, %r69, %r70}, [%rd4, {%f4, %f27}];\n"
1210 " // end inline asm\n"
1211 " mov.b32 %f136, %r68;\n"
1212 " mov.b32 %f137, %r67;\n"
1213 " // begin inline asm\n"
1214 " tex.2d.v4.f32.f32 {%r71, %r72, %r73, %r74}, [%rd4, {%f6, %f27}];\n"
1215 " // end inline asm\n"
1216 " mov.b32 %f138, %r72;\n"
1217 " mov.b32 %f139, %r71;\n"
1218 " // begin inline asm\n"
1219 " tex.2d.v4.f32.f32 {%r75, %r76, %r77, %r78}, [%rd4, {%f8, %f27}];\n"
1220 " // end inline asm\n"
1221 " mov.b32 %f140, %r76;\n"
1222 " mov.b32 %f141, %r75;\n"
1223 " mul.f32 %f142, %f63, %f137;\n"
1224 " mul.f32 %f143, %f63, %f136;\n"
1225 " fma.rn.f32 %f144, %f57, %f135, %f142;\n"
1226 " fma.rn.f32 %f145, %f57, %f134, %f143;\n"
1227 " fma.rn.f32 %f146, %f68, %f139, %f144;\n"
1228 " fma.rn.f32 %f147, %f68, %f138, %f145;\n"
1229 " fma.rn.f32 %f148, %f71, %f141, %f146;\n"
1230 " fma.rn.f32 %f149, %f71, %f140, %f147;\n"
1231 " mul.f32 %f150, %f78, %f116;\n"
1232 " mul.f32 %f151, %f78, %f117;\n"
1233 " fma.rn.f32 %f152, %f75, %f100, %f150;\n"
1234 " fma.rn.f32 %f153, %f75, %f101, %f151;\n"
1235 " fma.rn.f32 %f154, %f82, %f132, %f152;\n"
1236 " fma.rn.f32 %f155, %f82, %f133, %f153;\n"
1237 " fma.rn.f32 %f156, %f85, %f148, %f154;\n"
1238 " fma.rn.f32 %f157, %f85, %f149, %f155;\n"
1239 " mul.f32 %f158, %f156, %f48;\n"
1240 " mul.f32 %f159, %f157, %f48;\n"
1241 " cvt.rzi.u16.f32 %rs1, %f158;\n"
1242 " cvt.rzi.u16.f32 %rs2, %f159;\n"
1243 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
1244 " mul.wide.s32 %rd20, %r80, 4;\n"
1245 " add.s64 %rd21, %rd1, %rd20;\n"
1246 " st.global.v2.u16 [%rd21], {%rs1, %rs2};\n"
1251 " // .globl Subsample_Bicubic_ushort4\n"
1252 ".visible .entry Subsample_Bicubic_ushort4(\n"
1253 " .param .u64 Subsample_Bicubic_ushort4_param_0,\n"
1254 " .param .u64 Subsample_Bicubic_ushort4_param_1,\n"
1255 " .param .u32 Subsample_Bicubic_ushort4_param_2,\n"
1256 " .param .u32 Subsample_Bicubic_ushort4_param_3,\n"
1257 " .param .u32 Subsample_Bicubic_ushort4_param_4,\n"
1258 " .param .u32 Subsample_Bicubic_ushort4_param_5,\n"
1259 " .param .u32 Subsample_Bicubic_ushort4_param_6,\n"
1260 " .param .u32 Subsample_Bicubic_ushort4_param_7,\n"
1261 " .param .f32 Subsample_Bicubic_ushort4_param_8\n"
1264 " .reg .pred %p<6>;\n"
1265 " .reg .b16 %rs<5>;\n"
1266 " .reg .f32 %f<234>;\n"
1267 " .reg .b32 %r<81>;\n"
1268 " .reg .b64 %rd<22>;\n"
1270 " ld.param.u32 %r4, [Subsample_Bicubic_ushort4_param_3];\n"
1271 " ld.param.u32 %r3, [Subsample_Bicubic_ushort4_param_2];\n"
1272 " // begin inline asm\n"
1273 " mov.u32 %r9, %ctaid.x;\n"
1274 " // end inline asm\n"
1275 " // begin inline asm\n"
1276 " mov.u32 %r10, %ctaid.y;\n"
1277 " // end inline asm\n"
1278 " // begin inline asm\n"
1279 " mov.u32 %r11, %ntid.x;\n"
1280 " // end inline asm\n"
1281 " // begin inline asm\n"
1282 " mov.u32 %r12, %ntid.y;\n"
1283 " // end inline asm\n"
1284 " // begin inline asm\n"
1285 " mov.u32 %r13, %tid.x;\n"
1286 " // end inline asm\n"
1287 " // begin inline asm\n"
1288 " mov.u32 %r14, %tid.y;\n"
1289 " // end inline asm\n"
1290 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
1291 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
1292 " setp.ge.s32 %p1, %r2, %r4;\n"
1293 " setp.ge.s32 %p2, %r1, %r3;\n"
1294 " or.pred %p3, %p2, %p1;\n"
1295 " @%p3 bra LBB5_2;\n"
1296 " ld.param.f32 %f1, [Subsample_Bicubic_ushort4_param_8];\n"
1297 " ld.param.u32 %r8, [Subsample_Bicubic_ushort4_param_7];\n"
1298 " ld.param.u32 %r7, [Subsample_Bicubic_ushort4_param_6];\n"
1299 " ld.param.u32 %r6, [Subsample_Bicubic_ushort4_param_5];\n"
1300 " ld.param.u32 %r5, [Subsample_Bicubic_ushort4_param_4];\n"
1301 " ld.param.u64 %rd4, [Subsample_Bicubic_ushort4_param_0];\n"
1302 " ld.param.u64 %rd3, [Subsample_Bicubic_ushort4_param_1];\n"
1303 " cvta.to.global.u64 %rd1, %rd3;\n"
1304 " cvt.rn.f32.s32 %f34, %r6;\n"
1305 " cvt.rn.f32.s32 %f35, %r3;\n"
1306 " div.rn.f32 %f36, %f34, %f35;\n"
1307 " cvt.rn.f32.s32 %f37, %r7;\n"
1308 " cvt.rn.f32.s32 %f38, %r4;\n"
1309 " div.rn.f32 %f39, %f37, %f38;\n"
1310 " cvt.rn.f32.s32 %f40, %r1;\n"
1311 " add.f32 %f41, %f40, 0f3F000000;\n"
1312 " fma.rn.f32 %f42, %f36, %f41, 0fBF000000;\n"
1313 " cvt.rn.f32.s32 %f43, %r2;\n"
1314 " add.f32 %f44, %f43, 0f3F000000;\n"
1315 " fma.rn.f32 %f45, %f39, %f44, 0fBF000000;\n"
1316 " cvt.rmi.f32.f32 %f4, %f42;\n"
1317 " cvt.rmi.f32.f32 %f11, %f45;\n"
1318 " sub.f32 %f46, %f42, %f4;\n"
1319 " sub.f32 %f47, %f45, %f11;\n"
1320 " setp.gt.s32 %p4, %r8, 8;\n"
1321 " selp.b32 %r79, 65535, 255, %p4;\n"
1322 " cvt.rn.f32.s32 %f48, %r79;\n"
1323 " setp.eq.f32 %p5, %f1, 0f497423F0;\n"
1324 " neg.f32 %f49, %f1;\n"
1325 " selp.f32 %f50, 0f00000000, %f49, %p5;\n"
1326 " add.f32 %f51, %f46, 0f3F800000;\n"
1327 " mul.f32 %f52, %f50, 0fC0A00000;\n"
1328 " fma.rn.f32 %f53, %f50, %f51, %f52;\n"
1329 " mul.f32 %f54, %f50, 0f41000000;\n"
1330 " fma.rn.f32 %f55, %f51, %f53, %f54;\n"
1331 " mul.f32 %f56, %f50, 0fC0800000;\n"
1332 " fma.rn.f32 %f57, %f51, %f55, %f56;\n"
1333 " add.f32 %f58, %f50, 0f40000000;\n"
1334 " add.f32 %f59, %f50, 0f40400000;\n"
1335 " neg.f32 %f60, %f59;\n"
1336 " fma.rn.f32 %f61, %f58, %f46, %f60;\n"
1337 " mul.f32 %f62, %f46, %f61;\n"
1338 " fma.rn.f32 %f63, %f46, %f62, 0f3F800000;\n"
1339 " mov.f32 %f64, 0f3F800000;\n"
1340 " sub.f32 %f65, %f64, %f46;\n"
1341 " fma.rn.f32 %f66, %f58, %f65, %f60;\n"
1342 " mul.f32 %f67, %f65, %f66;\n"
1343 " fma.rn.f32 %f68, %f65, %f67, 0f3F800000;\n"
1344 " sub.f32 %f69, %f64, %f57;\n"
1345 " sub.f32 %f70, %f69, %f63;\n"
1346 " sub.f32 %f71, %f70, %f68;\n"
1347 " add.f32 %f72, %f47, 0f3F800000;\n"
1348 " fma.rn.f32 %f73, %f50, %f72, %f52;\n"
1349 " fma.rn.f32 %f74, %f72, %f73, %f54;\n"
1350 " fma.rn.f32 %f75, %f72, %f74, %f56;\n"
1351 " fma.rn.f32 %f76, %f58, %f47, %f60;\n"
1352 " mul.f32 %f77, %f47, %f76;\n"
1353 " fma.rn.f32 %f78, %f47, %f77, 0f3F800000;\n"
1354 " sub.f32 %f79, %f64, %f47;\n"
1355 " fma.rn.f32 %f80, %f58, %f79, %f60;\n"
1356 " mul.f32 %f81, %f79, %f80;\n"
1357 " fma.rn.f32 %f82, %f79, %f81, 0f3F800000;\n"
1358 " sub.f32 %f83, %f64, %f75;\n"
1359 " sub.f32 %f84, %f83, %f78;\n"
1360 " sub.f32 %f85, %f84, %f82;\n"
1361 " add.f32 %f2, %f4, 0fBF800000;\n"
1362 " add.f32 %f3, %f11, 0fBF800000;\n"
1363 " // begin inline asm\n"
1364 " tex.2d.v4.f32.f32 {%r15, %r16, %r17, %r18}, [%rd4, {%f2, %f3}];\n"
1365 " // end inline asm\n"
1366 " mov.b32 %f86, %r18;\n"
1367 " mov.b32 %f87, %r17;\n"
1368 " mov.b32 %f88, %r16;\n"
1369 " mov.b32 %f89, %r15;\n"
1370 " // begin inline asm\n"
1371 " tex.2d.v4.f32.f32 {%r19, %r20, %r21, %r22}, [%rd4, {%f4, %f3}];\n"
1372 " // end inline asm\n"
1373 " mov.b32 %f90, %r22;\n"
1374 " mov.b32 %f91, %r21;\n"
1375 " mov.b32 %f92, %r20;\n"
1376 " mov.b32 %f93, %r19;\n"
1377 " add.f32 %f6, %f4, 0f3F800000;\n"
1378 " // begin inline asm\n"
1379 " tex.2d.v4.f32.f32 {%r23, %r24, %r25, %r26}, [%rd4, {%f6, %f3}];\n"
1380 " // end inline asm\n"
1381 " mov.b32 %f94, %r26;\n"
1382 " mov.b32 %f95, %r25;\n"
1383 " mov.b32 %f96, %r24;\n"
1384 " mov.b32 %f97, %r23;\n"
1385 " add.f32 %f8, %f4, 0f40000000;\n"
1386 " // begin inline asm\n"
1387 " tex.2d.v4.f32.f32 {%r27, %r28, %r29, %r30}, [%rd4, {%f8, %f3}];\n"
1388 " // end inline asm\n"
1389 " mov.b32 %f98, %r30;\n"
1390 " mov.b32 %f99, %r29;\n"
1391 " mov.b32 %f100, %r28;\n"
1392 " mov.b32 %f101, %r27;\n"
1393 " mul.f32 %f102, %f63, %f93;\n"
1394 " mul.f32 %f103, %f63, %f92;\n"
1395 " mul.f32 %f104, %f63, %f91;\n"
1396 " mul.f32 %f105, %f63, %f90;\n"
1397 " fma.rn.f32 %f106, %f57, %f89, %f102;\n"
1398 " fma.rn.f32 %f107, %f57, %f88, %f103;\n"
1399 " fma.rn.f32 %f108, %f57, %f87, %f104;\n"
1400 " fma.rn.f32 %f109, %f57, %f86, %f105;\n"
1401 " fma.rn.f32 %f110, %f68, %f97, %f106;\n"
1402 " fma.rn.f32 %f111, %f68, %f96, %f107;\n"
1403 " fma.rn.f32 %f112, %f68, %f95, %f108;\n"
1404 " fma.rn.f32 %f113, %f68, %f94, %f109;\n"
1405 " fma.rn.f32 %f114, %f71, %f101, %f110;\n"
1406 " fma.rn.f32 %f115, %f71, %f100, %f111;\n"
1407 " fma.rn.f32 %f116, %f71, %f99, %f112;\n"
1408 " fma.rn.f32 %f117, %f71, %f98, %f113;\n"
1409 " // begin inline asm\n"
1410 " tex.2d.v4.f32.f32 {%r31, %r32, %r33, %r34}, [%rd4, {%f2, %f11}];\n"
1411 " // end inline asm\n"
1412 " mov.b32 %f118, %r34;\n"
1413 " mov.b32 %f119, %r33;\n"
1414 " mov.b32 %f120, %r32;\n"
1415 " mov.b32 %f121, %r31;\n"
1416 " // begin inline asm\n"
1417 " tex.2d.v4.f32.f32 {%r35, %r36, %r37, %r38}, [%rd4, {%f4, %f11}];\n"
1418 " // end inline asm\n"
1419 " mov.b32 %f122, %r38;\n"
1420 " mov.b32 %f123, %r37;\n"
1421 " mov.b32 %f124, %r36;\n"
1422 " mov.b32 %f125, %r35;\n"
1423 " // begin inline asm\n"
1424 " tex.2d.v4.f32.f32 {%r39, %r40, %r41, %r42}, [%rd4, {%f6, %f11}];\n"
1425 " // end inline asm\n"
1426 " mov.b32 %f126, %r42;\n"
1427 " mov.b32 %f127, %r41;\n"
1428 " mov.b32 %f128, %r40;\n"
1429 " mov.b32 %f129, %r39;\n"
1430 " // begin inline asm\n"
1431 " tex.2d.v4.f32.f32 {%r43, %r44, %r45, %r46}, [%rd4, {%f8, %f11}];\n"
1432 " // end inline asm\n"
1433 " mov.b32 %f130, %r46;\n"
1434 " mov.b32 %f131, %r45;\n"
1435 " mov.b32 %f132, %r44;\n"
1436 " mov.b32 %f133, %r43;\n"
1437 " mul.f32 %f134, %f63, %f125;\n"
1438 " mul.f32 %f135, %f63, %f124;\n"
1439 " mul.f32 %f136, %f63, %f123;\n"
1440 " mul.f32 %f137, %f63, %f122;\n"
1441 " fma.rn.f32 %f138, %f57, %f121, %f134;\n"
1442 " fma.rn.f32 %f139, %f57, %f120, %f135;\n"
1443 " fma.rn.f32 %f140, %f57, %f119, %f136;\n"
1444 " fma.rn.f32 %f141, %f57, %f118, %f137;\n"
1445 " fma.rn.f32 %f142, %f68, %f129, %f138;\n"
1446 " fma.rn.f32 %f143, %f68, %f128, %f139;\n"
1447 " fma.rn.f32 %f144, %f68, %f127, %f140;\n"
1448 " fma.rn.f32 %f145, %f68, %f126, %f141;\n"
1449 " fma.rn.f32 %f146, %f71, %f133, %f142;\n"
1450 " fma.rn.f32 %f147, %f71, %f132, %f143;\n"
1451 " fma.rn.f32 %f148, %f71, %f131, %f144;\n"
1452 " fma.rn.f32 %f149, %f71, %f130, %f145;\n"
1453 " add.f32 %f19, %f11, 0f3F800000;\n"
1454 " // begin inline asm\n"
1455 " tex.2d.v4.f32.f32 {%r47, %r48, %r49, %r50}, [%rd4, {%f2, %f19}];\n"
1456 " // end inline asm\n"
1457 " mov.b32 %f150, %r50;\n"
1458 " mov.b32 %f151, %r49;\n"
1459 " mov.b32 %f152, %r48;\n"
1460 " mov.b32 %f153, %r47;\n"
1461 " // begin inline asm\n"
1462 " tex.2d.v4.f32.f32 {%r51, %r52, %r53, %r54}, [%rd4, {%f4, %f19}];\n"
1463 " // end inline asm\n"
1464 " mov.b32 %f154, %r54;\n"
1465 " mov.b32 %f155, %r53;\n"
1466 " mov.b32 %f156, %r52;\n"
1467 " mov.b32 %f157, %r51;\n"
1468 " // begin inline asm\n"
1469 " tex.2d.v4.f32.f32 {%r55, %r56, %r57, %r58}, [%rd4, {%f6, %f19}];\n"
1470 " // end inline asm\n"
1471 " mov.b32 %f158, %r58;\n"
1472 " mov.b32 %f159, %r57;\n"
1473 " mov.b32 %f160, %r56;\n"
1474 " mov.b32 %f161, %r55;\n"
1475 " // begin inline asm\n"
1476 " tex.2d.v4.f32.f32 {%r59, %r60, %r61, %r62}, [%rd4, {%f8, %f19}];\n"
1477 " // end inline asm\n"
1478 " mov.b32 %f162, %r62;\n"
1479 " mov.b32 %f163, %r61;\n"
1480 " mov.b32 %f164, %r60;\n"
1481 " mov.b32 %f165, %r59;\n"
1482 " mul.f32 %f166, %f63, %f157;\n"
1483 " mul.f32 %f167, %f63, %f156;\n"
1484 " mul.f32 %f168, %f63, %f155;\n"
1485 " mul.f32 %f169, %f63, %f154;\n"
1486 " fma.rn.f32 %f170, %f57, %f153, %f166;\n"
1487 " fma.rn.f32 %f171, %f57, %f152, %f167;\n"
1488 " fma.rn.f32 %f172, %f57, %f151, %f168;\n"
1489 " fma.rn.f32 %f173, %f57, %f150, %f169;\n"
1490 " fma.rn.f32 %f174, %f68, %f161, %f170;\n"
1491 " fma.rn.f32 %f175, %f68, %f160, %f171;\n"
1492 " fma.rn.f32 %f176, %f68, %f159, %f172;\n"
1493 " fma.rn.f32 %f177, %f68, %f158, %f173;\n"
1494 " fma.rn.f32 %f178, %f71, %f165, %f174;\n"
1495 " fma.rn.f32 %f179, %f71, %f164, %f175;\n"
1496 " fma.rn.f32 %f180, %f71, %f163, %f176;\n"
1497 " fma.rn.f32 %f181, %f71, %f162, %f177;\n"
1498 " add.f32 %f27, %f11, 0f40000000;\n"
1499 " // begin inline asm\n"
1500 " tex.2d.v4.f32.f32 {%r63, %r64, %r65, %r66}, [%rd4, {%f2, %f27}];\n"
1501 " // end inline asm\n"
1502 " mov.b32 %f182, %r66;\n"
1503 " mov.b32 %f183, %r65;\n"
1504 " mov.b32 %f184, %r64;\n"
1505 " mov.b32 %f185, %r63;\n"
1506 " // begin inline asm\n"
1507 " tex.2d.v4.f32.f32 {%r67, %r68, %r69, %r70}, [%rd4, {%f4, %f27}];\n"
1508 " // end inline asm\n"
1509 " mov.b32 %f186, %r70;\n"
1510 " mov.b32 %f187, %r69;\n"
1511 " mov.b32 %f188, %r68;\n"
1512 " mov.b32 %f189, %r67;\n"
1513 " // begin inline asm\n"
1514 " tex.2d.v4.f32.f32 {%r71, %r72, %r73, %r74}, [%rd4, {%f6, %f27}];\n"
1515 " // end inline asm\n"
1516 " mov.b32 %f190, %r74;\n"
1517 " mov.b32 %f191, %r73;\n"
1518 " mov.b32 %f192, %r72;\n"
1519 " mov.b32 %f193, %r71;\n"
1520 " // begin inline asm\n"
1521 " tex.2d.v4.f32.f32 {%r75, %r76, %r77, %r78}, [%rd4, {%f8, %f27}];\n"
1522 " // end inline asm\n"
1523 " mov.b32 %f194, %r78;\n"
1524 " mov.b32 %f195, %r77;\n"
1525 " mov.b32 %f196, %r76;\n"
1526 " mov.b32 %f197, %r75;\n"
1527 " mul.f32 %f198, %f63, %f189;\n"
1528 " mul.f32 %f199, %f63, %f188;\n"
1529 " mul.f32 %f200, %f63, %f187;\n"
1530 " mul.f32 %f201, %f63, %f186;\n"
1531 " fma.rn.f32 %f202, %f57, %f185, %f198;\n"
1532 " fma.rn.f32 %f203, %f57, %f184, %f199;\n"
1533 " fma.rn.f32 %f204, %f57, %f183, %f200;\n"
1534 " fma.rn.f32 %f205, %f57, %f182, %f201;\n"
1535 " fma.rn.f32 %f206, %f68, %f193, %f202;\n"
1536 " fma.rn.f32 %f207, %f68, %f192, %f203;\n"
1537 " fma.rn.f32 %f208, %f68, %f191, %f204;\n"
1538 " fma.rn.f32 %f209, %f68, %f190, %f205;\n"
1539 " fma.rn.f32 %f210, %f71, %f197, %f206;\n"
1540 " fma.rn.f32 %f211, %f71, %f196, %f207;\n"
1541 " fma.rn.f32 %f212, %f71, %f195, %f208;\n"
1542 " fma.rn.f32 %f213, %f71, %f194, %f209;\n"
1543 " mul.f32 %f214, %f78, %f146;\n"
1544 " mul.f32 %f215, %f78, %f147;\n"
1545 " mul.f32 %f216, %f78, %f148;\n"
1546 " mul.f32 %f217, %f78, %f149;\n"
1547 " fma.rn.f32 %f218, %f75, %f114, %f214;\n"
1548 " fma.rn.f32 %f219, %f75, %f115, %f215;\n"
1549 " fma.rn.f32 %f220, %f75, %f116, %f216;\n"
1550 " fma.rn.f32 %f221, %f75, %f117, %f217;\n"
1551 " fma.rn.f32 %f222, %f82, %f178, %f218;\n"
1552 " fma.rn.f32 %f223, %f82, %f179, %f219;\n"
1553 " fma.rn.f32 %f224, %f82, %f180, %f220;\n"
1554 " fma.rn.f32 %f225, %f82, %f181, %f221;\n"
1555 " fma.rn.f32 %f226, %f85, %f210, %f222;\n"
1556 " fma.rn.f32 %f227, %f85, %f211, %f223;\n"
1557 " fma.rn.f32 %f228, %f85, %f212, %f224;\n"
1558 " fma.rn.f32 %f229, %f85, %f213, %f225;\n"
1559 " mul.f32 %f230, %f226, %f48;\n"
1560 " mul.f32 %f231, %f227, %f48;\n"
1561 " mul.f32 %f232, %f228, %f48;\n"
1562 " mul.f32 %f233, %f229, %f48;\n"
1563 " cvt.rzi.u16.f32 %rs1, %f230;\n"
1564 " cvt.rzi.u16.f32 %rs2, %f231;\n"
1565 " cvt.rzi.u16.f32 %rs3, %f232;\n"
1566 " cvt.rzi.u16.f32 %rs4, %f233;\n"
1567 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
1568 " mul.wide.s32 %rd20, %r80, 8;\n"
1569 " add.s64 %rd21, %rd1, %rd20;\n"
1570 " st.global.v4.u16 [%rd21], {%rs1, %rs2, %rs3, %rs4};\n"
1575 " // .globl Subsample_Lanczos_uchar\n"
1576 ".visible .entry Subsample_Lanczos_uchar(\n"
1577 " .param .u64 Subsample_Lanczos_uchar_param_0,\n"
1578 " .param .u64 Subsample_Lanczos_uchar_param_1,\n"
1579 " .param .u32 Subsample_Lanczos_uchar_param_2,\n"
1580 " .param .u32 Subsample_Lanczos_uchar_param_3,\n"
1581 " .param .u32 Subsample_Lanczos_uchar_param_4,\n"
1582 " .param .u32 Subsample_Lanczos_uchar_param_5,\n"
1583 " .param .u32 Subsample_Lanczos_uchar_param_6,\n"
1584 " .param .u32 Subsample_Lanczos_uchar_param_7,\n"
1585 " .param .f32 Subsample_Lanczos_uchar_param_8\n"
1588 " .reg .pred %p<13>;\n"
1589 " .reg .b16 %rs<2>;\n"
1590 " .reg .f32 %f<195>;\n"
1591 " .reg .b32 %r<81>;\n"
1592 " .reg .b64 %rd<22>;\n"
1594 " ld.param.u32 %r4, [Subsample_Lanczos_uchar_param_3];\n"
1595 " ld.param.u32 %r3, [Subsample_Lanczos_uchar_param_2];\n"
1596 " // begin inline asm\n"
1597 " mov.u32 %r9, %ctaid.x;\n"
1598 " // end inline asm\n"
1599 " // begin inline asm\n"
1600 " mov.u32 %r10, %ctaid.y;\n"
1601 " // end inline asm\n"
1602 " // begin inline asm\n"
1603 " mov.u32 %r11, %ntid.x;\n"
1604 " // end inline asm\n"
1605 " // begin inline asm\n"
1606 " mov.u32 %r12, %ntid.y;\n"
1607 " // end inline asm\n"
1608 " // begin inline asm\n"
1609 " mov.u32 %r13, %tid.x;\n"
1610 " // end inline asm\n"
1611 " // begin inline asm\n"
1612 " mov.u32 %r14, %tid.y;\n"
1613 " // end inline asm\n"
1614 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
1615 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
1616 " setp.ge.s32 %p1, %r2, %r4;\n"
1617 " setp.ge.s32 %p2, %r1, %r3;\n"
1618 " or.pred %p3, %p2, %p1;\n"
1619 " @%p3 bra LBB6_18;\n"
1620 " ld.param.u32 %r7, [Subsample_Lanczos_uchar_param_6];\n"
1621 " ld.param.u32 %r6, [Subsample_Lanczos_uchar_param_5];\n"
1622 " cvt.rn.f32.s32 %f50, %r6;\n"
1623 " cvt.rn.f32.s32 %f51, %r3;\n"
1624 " div.rn.f32 %f52, %f50, %f51;\n"
1625 " cvt.rn.f32.s32 %f53, %r7;\n"
1626 " cvt.rn.f32.s32 %f54, %r4;\n"
1627 " div.rn.f32 %f55, %f53, %f54;\n"
1628 " cvt.rn.f32.s32 %f56, %r1;\n"
1629 " add.f32 %f57, %f56, 0f3F000000;\n"
1630 " fma.rn.f32 %f58, %f52, %f57, 0fBF000000;\n"
1631 " cvt.rn.f32.s32 %f59, %r2;\n"
1632 " add.f32 %f60, %f59, 0f3F000000;\n"
1633 " cvt.rmi.f32.f32 %f113, %f58;\n"
1634 " sub.f32 %f62, %f58, %f113;\n"
1635 " add.f32 %f63, %f62, 0f3F800000;\n"
1636 " mul.f32 %f5, %f63, 0f40490FDB;\n"
1637 " mul.f32 %f6, %f62, 0f40490FDB;\n"
1638 " add.f32 %f64, %f62, 0fBF800000;\n"
1639 " setp.eq.f32 %p5, %f5, 0f00000000;\n"
1640 " mov.f32 %f194, 0f3F800000;\n"
1641 " mov.f32 %f187, %f194;\n"
1642 " @%p5 bra LBB6_3;\n"
1643 " mul.f32 %f66, %f5, %f5;\n"
1644 " mul.f32 %f9, %f66, 0f3F000000;\n"
1645 " mul.f32 %f10, %f5, 0f3F000000;\n"
1646 " sin.approx.f32 %f67, %f5;\n"
1647 " sin.approx.f32 %f68, %f10;\n"
1648 " mul.f32 %f69, %f67, %f68;\n"
1649 " div.rn.f32 %f187, %f69, %f9;\n"
1651 " fma.rn.f32 %f61, %f55, %f60, 0fBF000000;\n"
1652 " add.f32 %f65, %f62, 0fC0000000;\n"
1653 " mul.f32 %f7, %f64, 0f40490FDB;\n"
1654 " setp.eq.f32 %p6, %f6, 0f00000000;\n"
1655 " mov.f32 %f188, %f194;\n"
1656 " @%p6 bra LBB6_5;\n"
1657 " mul.f32 %f71, %f6, %f6;\n"
1658 " mul.f32 %f13, %f71, 0f3F000000;\n"
1659 " mul.f32 %f14, %f6, 0f3F000000;\n"
1660 " sin.approx.f32 %f72, %f6;\n"
1661 " sin.approx.f32 %f73, %f14;\n"
1662 " mul.f32 %f74, %f72, %f73;\n"
1663 " div.rn.f32 %f188, %f74, %f13;\n"
1665 " cvt.rmi.f32.f32 %f120, %f61;\n"
1666 " mul.f32 %f8, %f65, 0f40490FDB;\n"
1667 " setp.eq.f32 %p7, %f7, 0f00000000;\n"
1668 " mov.f32 %f189, %f194;\n"
1669 " @%p7 bra LBB6_7;\n"
1670 " mul.f32 %f76, %f7, %f7;\n"
1671 " mul.f32 %f17, %f76, 0f3F000000;\n"
1672 " mul.f32 %f18, %f7, 0f3F000000;\n"
1673 " sin.approx.f32 %f77, %f7;\n"
1674 " sin.approx.f32 %f78, %f18;\n"
1675 " mul.f32 %f79, %f77, %f78;\n"
1676 " div.rn.f32 %f189, %f79, %f17;\n"
1678 " sub.f32 %f3, %f61, %f120;\n"
1679 " setp.eq.f32 %p8, %f8, 0f00000000;\n"
1680 " mov.f32 %f190, %f194;\n"
1681 " @%p8 bra LBB6_9;\n"
1682 " mul.f32 %f81, %f8, %f8;\n"
1683 " mul.f32 %f21, %f81, 0f3F000000;\n"
1684 " mul.f32 %f22, %f8, 0f3F000000;\n"
1685 " sin.approx.f32 %f82, %f8;\n"
1686 " sin.approx.f32 %f83, %f22;\n"
1687 " mul.f32 %f84, %f82, %f83;\n"
1688 " div.rn.f32 %f190, %f84, %f21;\n"
1690 " ld.param.u32 %r8, [Subsample_Lanczos_uchar_param_7];\n"
1691 " add.f32 %f86, %f187, %f188;\n"
1692 " add.f32 %f89, %f3, 0f3F800000;\n"
1693 " mul.f32 %f29, %f89, 0f40490FDB;\n"
1694 " mul.f32 %f30, %f3, 0f40490FDB;\n"
1695 " add.f32 %f90, %f3, 0fBF800000;\n"
1696 " setp.eq.f32 %p9, %f29, 0f00000000;\n"
1697 " mov.f32 %f191, %f194;\n"
1698 " @%p9 bra LBB6_11;\n"
1699 " mul.f32 %f92, %f29, %f29;\n"
1700 " mul.f32 %f33, %f92, 0f3F000000;\n"
1701 " mul.f32 %f34, %f29, 0f3F000000;\n"
1702 " sin.approx.f32 %f93, %f29;\n"
1703 " sin.approx.f32 %f94, %f34;\n"
1704 " mul.f32 %f95, %f93, %f94;\n"
1705 " div.rn.f32 %f191, %f95, %f33;\n"
1707 " setp.gt.s32 %p4, %r8, 8;\n"
1708 " add.f32 %f87, %f86, %f189;\n"
1709 " add.f32 %f91, %f3, 0fC0000000;\n"
1710 " mul.f32 %f31, %f90, 0f40490FDB;\n"
1711 " setp.eq.f32 %p10, %f30, 0f00000000;\n"
1712 " mov.f32 %f192, %f194;\n"
1713 " @%p10 bra LBB6_13;\n"
1714 " mul.f32 %f97, %f30, %f30;\n"
1715 " mul.f32 %f37, %f97, 0f3F000000;\n"
1716 " mul.f32 %f38, %f30, 0f3F000000;\n"
1717 " sin.approx.f32 %f98, %f30;\n"
1718 " sin.approx.f32 %f99, %f38;\n"
1719 " mul.f32 %f100, %f98, %f99;\n"
1720 " div.rn.f32 %f192, %f100, %f37;\n"
1722 " ld.param.u64 %rd3, [Subsample_Lanczos_uchar_param_1];\n"
1723 " selp.b32 %r15, 65535, 255, %p4;\n"
1724 " add.f32 %f88, %f87, %f190;\n"
1725 " mul.f32 %f32, %f91, 0f40490FDB;\n"
1726 " setp.eq.f32 %p11, %f31, 0f00000000;\n"
1727 " mov.f32 %f193, %f194;\n"
1728 " @%p11 bra LBB6_15;\n"
1729 " mul.f32 %f102, %f31, %f31;\n"
1730 " mul.f32 %f41, %f102, 0f3F000000;\n"
1731 " mul.f32 %f42, %f31, 0f3F000000;\n"
1732 " sin.approx.f32 %f103, %f31;\n"
1733 " sin.approx.f32 %f104, %f42;\n"
1734 " mul.f32 %f105, %f103, %f104;\n"
1735 " div.rn.f32 %f193, %f105, %f41;\n"
1737 " ld.param.u32 %r5, [Subsample_Lanczos_uchar_param_4];\n"
1738 " ld.param.u64 %rd4, [Subsample_Lanczos_uchar_param_0];\n"
1739 " cvta.to.global.u64 %rd1, %rd3;\n"
1740 " cvt.rn.f32.s32 %f4, %r15;\n"
1741 " div.rn.f32 %f25, %f187, %f88;\n"
1742 " div.rn.f32 %f26, %f188, %f88;\n"
1743 " div.rn.f32 %f27, %f189, %f88;\n"
1744 " div.rn.f32 %f28, %f190, %f88;\n"
1745 " setp.eq.f32 %p12, %f32, 0f00000000;\n"
1746 " @%p12 bra LBB6_17;\n"
1747 " mul.f32 %f107, %f32, %f32;\n"
1748 " mul.f32 %f45, %f107, 0f3F000000;\n"
1749 " mul.f32 %f46, %f32, 0f3F000000;\n"
1750 " sin.approx.f32 %f108, %f32;\n"
1751 " sin.approx.f32 %f109, %f46;\n"
1752 " mul.f32 %f110, %f108, %f109;\n"
1753 " div.rn.f32 %f194, %f110, %f45;\n"
1755 " add.f32 %f143, %f191, %f192;\n"
1756 " add.f32 %f144, %f143, %f193;\n"
1757 " add.f32 %f145, %f144, %f194;\n"
1758 " div.rn.f32 %f146, %f191, %f145;\n"
1759 " div.rn.f32 %f147, %f192, %f145;\n"
1760 " div.rn.f32 %f148, %f193, %f145;\n"
1761 " div.rn.f32 %f149, %f194, %f145;\n"
1762 " add.f32 %f111, %f113, 0fBF800000;\n"
1763 " add.f32 %f112, %f120, 0fBF800000;\n"
1764 " // begin inline asm\n"
1765 " tex.2d.v4.f32.f32 {%r16, %r17, %r18, %r19}, [%rd4, {%f111, %f112}];\n"
1766 " // end inline asm\n"
1767 " mov.b32 %f150, %r16;\n"
1768 " // begin inline asm\n"
1769 " tex.2d.v4.f32.f32 {%r20, %r21, %r22, %r23}, [%rd4, {%f113, %f112}];\n"
1770 " // end inline asm\n"
1771 " mov.b32 %f151, %r20;\n"
1772 " add.f32 %f115, %f113, 0f3F800000;\n"
1773 " // begin inline asm\n"
1774 " tex.2d.v4.f32.f32 {%r24, %r25, %r26, %r27}, [%rd4, {%f115, %f112}];\n"
1775 " // end inline asm\n"
1776 " mov.b32 %f152, %r24;\n"
1777 " add.f32 %f117, %f113, 0f40000000;\n"
1778 " // begin inline asm\n"
1779 " tex.2d.v4.f32.f32 {%r28, %r29, %r30, %r31}, [%rd4, {%f117, %f112}];\n"
1780 " // end inline asm\n"
1781 " mov.b32 %f153, %r28;\n"
1782 " mul.f32 %f154, %f26, %f151;\n"
1783 " fma.rn.f32 %f155, %f25, %f150, %f154;\n"
1784 " fma.rn.f32 %f156, %f27, %f152, %f155;\n"
1785 " fma.rn.f32 %f157, %f28, %f153, %f156;\n"
1786 " // begin inline asm\n"
1787 " tex.2d.v4.f32.f32 {%r32, %r33, %r34, %r35}, [%rd4, {%f111, %f120}];\n"
1788 " // end inline asm\n"
1789 " mov.b32 %f158, %r32;\n"
1790 " // begin inline asm\n"
1791 " tex.2d.v4.f32.f32 {%r36, %r37, %r38, %r39}, [%rd4, {%f113, %f120}];\n"
1792 " // end inline asm\n"
1793 " mov.b32 %f159, %r36;\n"
1794 " // begin inline asm\n"
1795 " tex.2d.v4.f32.f32 {%r40, %r41, %r42, %r43}, [%rd4, {%f115, %f120}];\n"
1796 " // end inline asm\n"
1797 " mov.b32 %f160, %r40;\n"
1798 " // begin inline asm\n"
1799 " tex.2d.v4.f32.f32 {%r44, %r45, %r46, %r47}, [%rd4, {%f117, %f120}];\n"
1800 " // end inline asm\n"
1801 " mov.b32 %f161, %r44;\n"
1802 " mul.f32 %f162, %f26, %f159;\n"
1803 " fma.rn.f32 %f163, %f25, %f158, %f162;\n"
1804 " fma.rn.f32 %f164, %f27, %f160, %f163;\n"
1805 " fma.rn.f32 %f165, %f28, %f161, %f164;\n"
1806 " add.f32 %f128, %f120, 0f3F800000;\n"
1807 " // begin inline asm\n"
1808 " tex.2d.v4.f32.f32 {%r48, %r49, %r50, %r51}, [%rd4, {%f111, %f128}];\n"
1809 " // end inline asm\n"
1810 " mov.b32 %f166, %r48;\n"
1811 " // begin inline asm\n"
1812 " tex.2d.v4.f32.f32 {%r52, %r53, %r54, %r55}, [%rd4, {%f113, %f128}];\n"
1813 " // end inline asm\n"
1814 " mov.b32 %f167, %r52;\n"
1815 " // begin inline asm\n"
1816 " tex.2d.v4.f32.f32 {%r56, %r57, %r58, %r59}, [%rd4, {%f115, %f128}];\n"
1817 " // end inline asm\n"
1818 " mov.b32 %f168, %r56;\n"
1819 " // begin inline asm\n"
1820 " tex.2d.v4.f32.f32 {%r60, %r61, %r62, %r63}, [%rd4, {%f117, %f128}];\n"
1821 " // end inline asm\n"
1822 " mov.b32 %f169, %r60;\n"
1823 " mul.f32 %f170, %f26, %f167;\n"
1824 " fma.rn.f32 %f171, %f25, %f166, %f170;\n"
1825 " fma.rn.f32 %f172, %f27, %f168, %f171;\n"
1826 " fma.rn.f32 %f173, %f28, %f169, %f172;\n"
1827 " add.f32 %f136, %f120, 0f40000000;\n"
1828 " // begin inline asm\n"
1829 " tex.2d.v4.f32.f32 {%r64, %r65, %r66, %r67}, [%rd4, {%f111, %f136}];\n"
1830 " // end inline asm\n"
1831 " mov.b32 %f174, %r64;\n"
1832 " // begin inline asm\n"
1833 " tex.2d.v4.f32.f32 {%r68, %r69, %r70, %r71}, [%rd4, {%f113, %f136}];\n"
1834 " // end inline asm\n"
1835 " mov.b32 %f175, %r68;\n"
1836 " // begin inline asm\n"
1837 " tex.2d.v4.f32.f32 {%r72, %r73, %r74, %r75}, [%rd4, {%f115, %f136}];\n"
1838 " // end inline asm\n"
1839 " mov.b32 %f176, %r72;\n"
1840 " // begin inline asm\n"
1841 " tex.2d.v4.f32.f32 {%r76, %r77, %r78, %r79}, [%rd4, {%f117, %f136}];\n"
1842 " // end inline asm\n"
1843 " mov.b32 %f177, %r76;\n"
1844 " mul.f32 %f178, %f26, %f175;\n"
1845 " fma.rn.f32 %f179, %f25, %f174, %f178;\n"
1846 " fma.rn.f32 %f180, %f27, %f176, %f179;\n"
1847 " fma.rn.f32 %f181, %f28, %f177, %f180;\n"
1848 " mul.f32 %f182, %f147, %f165;\n"
1849 " fma.rn.f32 %f183, %f146, %f157, %f182;\n"
1850 " fma.rn.f32 %f184, %f148, %f173, %f183;\n"
1851 " fma.rn.f32 %f185, %f149, %f181, %f184;\n"
1852 " mul.f32 %f186, %f185, %f4;\n"
1853 " cvt.rzi.u16.f32 %rs1, %f186;\n"
1854 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
1855 " cvt.s64.s32 %rd20, %r80;\n"
1856 " add.s64 %rd21, %rd1, %rd20;\n"
1857 " st.global.u8 [%rd21], %rs1;\n"
1862 " // .globl Subsample_Lanczos_uchar2\n"
1863 ".visible .entry Subsample_Lanczos_uchar2(\n"
1864 " .param .u64 Subsample_Lanczos_uchar2_param_0,\n"
1865 " .param .u64 Subsample_Lanczos_uchar2_param_1,\n"
1866 " .param .u32 Subsample_Lanczos_uchar2_param_2,\n"
1867 " .param .u32 Subsample_Lanczos_uchar2_param_3,\n"
1868 " .param .u32 Subsample_Lanczos_uchar2_param_4,\n"
1869 " .param .u32 Subsample_Lanczos_uchar2_param_5,\n"
1870 " .param .u32 Subsample_Lanczos_uchar2_param_6,\n"
1871 " .param .u32 Subsample_Lanczos_uchar2_param_7,\n"
1872 " .param .f32 Subsample_Lanczos_uchar2_param_8\n"
1875 " .reg .pred %p<13>;\n"
1876 " .reg .b16 %rs<3>;\n"
1877 " .reg .f32 %f<232>;\n"
1878 " .reg .b32 %r<81>;\n"
1879 " .reg .b64 %rd<22>;\n"
1881 " ld.param.u32 %r4, [Subsample_Lanczos_uchar2_param_3];\n"
1882 " ld.param.u32 %r3, [Subsample_Lanczos_uchar2_param_2];\n"
1883 " // begin inline asm\n"
1884 " mov.u32 %r9, %ctaid.x;\n"
1885 " // end inline asm\n"
1886 " // begin inline asm\n"
1887 " mov.u32 %r10, %ctaid.y;\n"
1888 " // end inline asm\n"
1889 " // begin inline asm\n"
1890 " mov.u32 %r11, %ntid.x;\n"
1891 " // end inline asm\n"
1892 " // begin inline asm\n"
1893 " mov.u32 %r12, %ntid.y;\n"
1894 " // end inline asm\n"
1895 " // begin inline asm\n"
1896 " mov.u32 %r13, %tid.x;\n"
1897 " // end inline asm\n"
1898 " // begin inline asm\n"
1899 " mov.u32 %r14, %tid.y;\n"
1900 " // end inline asm\n"
1901 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
1902 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
1903 " setp.ge.s32 %p1, %r2, %r4;\n"
1904 " setp.ge.s32 %p2, %r1, %r3;\n"
1905 " or.pred %p3, %p2, %p1;\n"
1906 " @%p3 bra LBB7_18;\n"
1907 " ld.param.u32 %r7, [Subsample_Lanczos_uchar2_param_6];\n"
1908 " ld.param.u32 %r6, [Subsample_Lanczos_uchar2_param_5];\n"
1909 " cvt.rn.f32.s32 %f50, %r6;\n"
1910 " cvt.rn.f32.s32 %f51, %r3;\n"
1911 " div.rn.f32 %f52, %f50, %f51;\n"
1912 " cvt.rn.f32.s32 %f53, %r7;\n"
1913 " cvt.rn.f32.s32 %f54, %r4;\n"
1914 " div.rn.f32 %f55, %f53, %f54;\n"
1915 " cvt.rn.f32.s32 %f56, %r1;\n"
1916 " add.f32 %f57, %f56, 0f3F000000;\n"
1917 " fma.rn.f32 %f58, %f52, %f57, 0fBF000000;\n"
1918 " cvt.rn.f32.s32 %f59, %r2;\n"
1919 " add.f32 %f60, %f59, 0f3F000000;\n"
1920 " cvt.rmi.f32.f32 %f113, %f58;\n"
1921 " sub.f32 %f62, %f58, %f113;\n"
1922 " add.f32 %f63, %f62, 0f3F800000;\n"
1923 " mul.f32 %f5, %f63, 0f40490FDB;\n"
1924 " mul.f32 %f6, %f62, 0f40490FDB;\n"
1925 " add.f32 %f64, %f62, 0fBF800000;\n"
1926 " setp.eq.f32 %p5, %f5, 0f00000000;\n"
1927 " mov.f32 %f231, 0f3F800000;\n"
1928 " mov.f32 %f224, %f231;\n"
1929 " @%p5 bra LBB7_3;\n"
1930 " mul.f32 %f9, %f5, 0f3F000000;\n"
1931 " mul.f32 %f66, %f5, %f5;\n"
1932 " mul.f32 %f10, %f66, 0f3F000000;\n"
1933 " sin.approx.f32 %f67, %f5;\n"
1934 " sin.approx.f32 %f68, %f9;\n"
1935 " mul.f32 %f69, %f67, %f68;\n"
1936 " div.rn.f32 %f224, %f69, %f10;\n"
1938 " fma.rn.f32 %f61, %f55, %f60, 0fBF000000;\n"
1939 " add.f32 %f65, %f62, 0fC0000000;\n"
1940 " mul.f32 %f7, %f64, 0f40490FDB;\n"
1941 " setp.eq.f32 %p6, %f6, 0f00000000;\n"
1942 " mov.f32 %f225, %f231;\n"
1943 " @%p6 bra LBB7_5;\n"
1944 " mul.f32 %f13, %f6, 0f3F000000;\n"
1945 " mul.f32 %f71, %f6, %f6;\n"
1946 " mul.f32 %f14, %f71, 0f3F000000;\n"
1947 " sin.approx.f32 %f72, %f6;\n"
1948 " sin.approx.f32 %f73, %f13;\n"
1949 " mul.f32 %f74, %f72, %f73;\n"
1950 " div.rn.f32 %f225, %f74, %f14;\n"
1952 " cvt.rmi.f32.f32 %f120, %f61;\n"
1953 " mul.f32 %f8, %f65, 0f40490FDB;\n"
1954 " setp.eq.f32 %p7, %f7, 0f00000000;\n"
1955 " mov.f32 %f226, %f231;\n"
1956 " @%p7 bra LBB7_7;\n"
1957 " mul.f32 %f17, %f7, 0f3F000000;\n"
1958 " mul.f32 %f76, %f7, %f7;\n"
1959 " mul.f32 %f18, %f76, 0f3F000000;\n"
1960 " sin.approx.f32 %f77, %f7;\n"
1961 " sin.approx.f32 %f78, %f17;\n"
1962 " mul.f32 %f79, %f77, %f78;\n"
1963 " div.rn.f32 %f226, %f79, %f18;\n"
1965 " sub.f32 %f3, %f61, %f120;\n"
1966 " setp.eq.f32 %p8, %f8, 0f00000000;\n"
1967 " mov.f32 %f227, %f231;\n"
1968 " @%p8 bra LBB7_9;\n"
1969 " mul.f32 %f21, %f8, 0f3F000000;\n"
1970 " mul.f32 %f81, %f8, %f8;\n"
1971 " mul.f32 %f22, %f81, 0f3F000000;\n"
1972 " sin.approx.f32 %f82, %f8;\n"
1973 " sin.approx.f32 %f83, %f21;\n"
1974 " mul.f32 %f84, %f82, %f83;\n"
1975 " div.rn.f32 %f227, %f84, %f22;\n"
1977 " ld.param.u32 %r8, [Subsample_Lanczos_uchar2_param_7];\n"
1978 " add.f32 %f86, %f224, %f225;\n"
1979 " add.f32 %f89, %f3, 0f3F800000;\n"
1980 " mul.f32 %f29, %f89, 0f40490FDB;\n"
1981 " mul.f32 %f30, %f3, 0f40490FDB;\n"
1982 " add.f32 %f90, %f3, 0fBF800000;\n"
1983 " setp.eq.f32 %p9, %f29, 0f00000000;\n"
1984 " mov.f32 %f228, %f231;\n"
1985 " @%p9 bra LBB7_11;\n"
1986 " mul.f32 %f33, %f29, 0f3F000000;\n"
1987 " mul.f32 %f92, %f29, %f29;\n"
1988 " mul.f32 %f34, %f92, 0f3F000000;\n"
1989 " sin.approx.f32 %f93, %f29;\n"
1990 " sin.approx.f32 %f94, %f33;\n"
1991 " mul.f32 %f95, %f93, %f94;\n"
1992 " div.rn.f32 %f228, %f95, %f34;\n"
1994 " setp.gt.s32 %p4, %r8, 8;\n"
1995 " add.f32 %f87, %f86, %f226;\n"
1996 " add.f32 %f91, %f3, 0fC0000000;\n"
1997 " mul.f32 %f31, %f90, 0f40490FDB;\n"
1998 " setp.eq.f32 %p10, %f30, 0f00000000;\n"
1999 " mov.f32 %f229, %f231;\n"
2000 " @%p10 bra LBB7_13;\n"
2001 " mul.f32 %f37, %f30, 0f3F000000;\n"
2002 " mul.f32 %f97, %f30, %f30;\n"
2003 " mul.f32 %f38, %f97, 0f3F000000;\n"
2004 " sin.approx.f32 %f98, %f30;\n"
2005 " sin.approx.f32 %f99, %f37;\n"
2006 " mul.f32 %f100, %f98, %f99;\n"
2007 " div.rn.f32 %f229, %f100, %f38;\n"
2009 " ld.param.u64 %rd3, [Subsample_Lanczos_uchar2_param_1];\n"
2010 " selp.b32 %r15, 65535, 255, %p4;\n"
2011 " add.f32 %f88, %f87, %f227;\n"
2012 " mul.f32 %f32, %f91, 0f40490FDB;\n"
2013 " setp.eq.f32 %p11, %f31, 0f00000000;\n"
2014 " mov.f32 %f230, %f231;\n"
2015 " @%p11 bra LBB7_15;\n"
2016 " mul.f32 %f41, %f31, 0f3F000000;\n"
2017 " mul.f32 %f102, %f31, %f31;\n"
2018 " mul.f32 %f42, %f102, 0f3F000000;\n"
2019 " sin.approx.f32 %f103, %f31;\n"
2020 " sin.approx.f32 %f104, %f41;\n"
2021 " mul.f32 %f105, %f103, %f104;\n"
2022 " div.rn.f32 %f230, %f105, %f42;\n"
2024 " ld.param.u32 %r5, [Subsample_Lanczos_uchar2_param_4];\n"
2025 " ld.param.u64 %rd4, [Subsample_Lanczos_uchar2_param_0];\n"
2026 " cvta.to.global.u64 %rd1, %rd3;\n"
2027 " cvt.rn.f32.s32 %f4, %r15;\n"
2028 " div.rn.f32 %f25, %f224, %f88;\n"
2029 " div.rn.f32 %f26, %f225, %f88;\n"
2030 " div.rn.f32 %f27, %f226, %f88;\n"
2031 " div.rn.f32 %f28, %f227, %f88;\n"
2032 " setp.eq.f32 %p12, %f32, 0f00000000;\n"
2033 " @%p12 bra LBB7_17;\n"
2034 " mul.f32 %f45, %f32, 0f3F000000;\n"
2035 " mul.f32 %f107, %f32, %f32;\n"
2036 " mul.f32 %f46, %f107, 0f3F000000;\n"
2037 " sin.approx.f32 %f108, %f32;\n"
2038 " sin.approx.f32 %f109, %f45;\n"
2039 " mul.f32 %f110, %f108, %f109;\n"
2040 " div.rn.f32 %f231, %f110, %f46;\n"
2042 " add.f32 %f143, %f228, %f229;\n"
2043 " add.f32 %f144, %f143, %f230;\n"
2044 " add.f32 %f145, %f144, %f231;\n"
2045 " div.rn.f32 %f146, %f228, %f145;\n"
2046 " div.rn.f32 %f147, %f229, %f145;\n"
2047 " div.rn.f32 %f148, %f230, %f145;\n"
2048 " div.rn.f32 %f149, %f231, %f145;\n"
2049 " add.f32 %f111, %f113, 0fBF800000;\n"
2050 " add.f32 %f112, %f120, 0fBF800000;\n"
2051 " // begin inline asm\n"
2052 " tex.2d.v4.f32.f32 {%r16, %r17, %r18, %r19}, [%rd4, {%f111, %f112}];\n"
2053 " // end inline asm\n"
2054 " mov.b32 %f150, %r17;\n"
2055 " mov.b32 %f151, %r16;\n"
2056 " // begin inline asm\n"
2057 " tex.2d.v4.f32.f32 {%r20, %r21, %r22, %r23}, [%rd4, {%f113, %f112}];\n"
2058 " // end inline asm\n"
2059 " mov.b32 %f152, %r21;\n"
2060 " mov.b32 %f153, %r20;\n"
2061 " add.f32 %f115, %f113, 0f3F800000;\n"
2062 " // begin inline asm\n"
2063 " tex.2d.v4.f32.f32 {%r24, %r25, %r26, %r27}, [%rd4, {%f115, %f112}];\n"
2064 " // end inline asm\n"
2065 " mov.b32 %f154, %r25;\n"
2066 " mov.b32 %f155, %r24;\n"
2067 " add.f32 %f117, %f113, 0f40000000;\n"
2068 " // begin inline asm\n"
2069 " tex.2d.v4.f32.f32 {%r28, %r29, %r30, %r31}, [%rd4, {%f117, %f112}];\n"
2070 " // end inline asm\n"
2071 " mov.b32 %f156, %r29;\n"
2072 " mov.b32 %f157, %r28;\n"
2073 " mul.f32 %f158, %f26, %f153;\n"
2074 " mul.f32 %f159, %f26, %f152;\n"
2075 " fma.rn.f32 %f160, %f25, %f151, %f158;\n"
2076 " fma.rn.f32 %f161, %f25, %f150, %f159;\n"
2077 " fma.rn.f32 %f162, %f27, %f155, %f160;\n"
2078 " fma.rn.f32 %f163, %f27, %f154, %f161;\n"
2079 " fma.rn.f32 %f164, %f28, %f157, %f162;\n"
2080 " fma.rn.f32 %f165, %f28, %f156, %f163;\n"
2081 " // begin inline asm\n"
2082 " tex.2d.v4.f32.f32 {%r32, %r33, %r34, %r35}, [%rd4, {%f111, %f120}];\n"
2083 " // end inline asm\n"
2084 " mov.b32 %f166, %r33;\n"
2085 " mov.b32 %f167, %r32;\n"
2086 " // begin inline asm\n"
2087 " tex.2d.v4.f32.f32 {%r36, %r37, %r38, %r39}, [%rd4, {%f113, %f120}];\n"
2088 " // end inline asm\n"
2089 " mov.b32 %f168, %r37;\n"
2090 " mov.b32 %f169, %r36;\n"
2091 " // begin inline asm\n"
2092 " tex.2d.v4.f32.f32 {%r40, %r41, %r42, %r43}, [%rd4, {%f115, %f120}];\n"
2093 " // end inline asm\n"
2094 " mov.b32 %f170, %r41;\n"
2095 " mov.b32 %f171, %r40;\n"
2096 " // begin inline asm\n"
2097 " tex.2d.v4.f32.f32 {%r44, %r45, %r46, %r47}, [%rd4, {%f117, %f120}];\n"
2098 " // end inline asm\n"
2099 " mov.b32 %f172, %r45;\n"
2100 " mov.b32 %f173, %r44;\n"
2101 " mul.f32 %f174, %f26, %f169;\n"
2102 " mul.f32 %f175, %f26, %f168;\n"
2103 " fma.rn.f32 %f176, %f25, %f167, %f174;\n"
2104 " fma.rn.f32 %f177, %f25, %f166, %f175;\n"
2105 " fma.rn.f32 %f178, %f27, %f171, %f176;\n"
2106 " fma.rn.f32 %f179, %f27, %f170, %f177;\n"
2107 " fma.rn.f32 %f180, %f28, %f173, %f178;\n"
2108 " fma.rn.f32 %f181, %f28, %f172, %f179;\n"
2109 " add.f32 %f128, %f120, 0f3F800000;\n"
2110 " // begin inline asm\n"
2111 " tex.2d.v4.f32.f32 {%r48, %r49, %r50, %r51}, [%rd4, {%f111, %f128}];\n"
2112 " // end inline asm\n"
2113 " mov.b32 %f182, %r49;\n"
2114 " mov.b32 %f183, %r48;\n"
2115 " // begin inline asm\n"
2116 " tex.2d.v4.f32.f32 {%r52, %r53, %r54, %r55}, [%rd4, {%f113, %f128}];\n"
2117 " // end inline asm\n"
2118 " mov.b32 %f184, %r53;\n"
2119 " mov.b32 %f185, %r52;\n"
2120 " // begin inline asm\n"
2121 " tex.2d.v4.f32.f32 {%r56, %r57, %r58, %r59}, [%rd4, {%f115, %f128}];\n"
2122 " // end inline asm\n"
2123 " mov.b32 %f186, %r57;\n"
2124 " mov.b32 %f187, %r56;\n"
2125 " // begin inline asm\n"
2126 " tex.2d.v4.f32.f32 {%r60, %r61, %r62, %r63}, [%rd4, {%f117, %f128}];\n"
2127 " // end inline asm\n"
2128 " mov.b32 %f188, %r61;\n"
2129 " mov.b32 %f189, %r60;\n"
2130 " mul.f32 %f190, %f26, %f185;\n"
2131 " mul.f32 %f191, %f26, %f184;\n"
2132 " fma.rn.f32 %f192, %f25, %f183, %f190;\n"
2133 " fma.rn.f32 %f193, %f25, %f182, %f191;\n"
2134 " fma.rn.f32 %f194, %f27, %f187, %f192;\n"
2135 " fma.rn.f32 %f195, %f27, %f186, %f193;\n"
2136 " fma.rn.f32 %f196, %f28, %f189, %f194;\n"
2137 " fma.rn.f32 %f197, %f28, %f188, %f195;\n"
2138 " add.f32 %f136, %f120, 0f40000000;\n"
2139 " // begin inline asm\n"
2140 " tex.2d.v4.f32.f32 {%r64, %r65, %r66, %r67}, [%rd4, {%f111, %f136}];\n"
2141 " // end inline asm\n"
2142 " mov.b32 %f198, %r65;\n"
2143 " mov.b32 %f199, %r64;\n"
2144 " // begin inline asm\n"
2145 " tex.2d.v4.f32.f32 {%r68, %r69, %r70, %r71}, [%rd4, {%f113, %f136}];\n"
2146 " // end inline asm\n"
2147 " mov.b32 %f200, %r69;\n"
2148 " mov.b32 %f201, %r68;\n"
2149 " // begin inline asm\n"
2150 " tex.2d.v4.f32.f32 {%r72, %r73, %r74, %r75}, [%rd4, {%f115, %f136}];\n"
2151 " // end inline asm\n"
2152 " mov.b32 %f202, %r73;\n"
2153 " mov.b32 %f203, %r72;\n"
2154 " // begin inline asm\n"
2155 " tex.2d.v4.f32.f32 {%r76, %r77, %r78, %r79}, [%rd4, {%f117, %f136}];\n"
2156 " // end inline asm\n"
2157 " mov.b32 %f204, %r77;\n"
2158 " mov.b32 %f205, %r76;\n"
2159 " mul.f32 %f206, %f26, %f201;\n"
2160 " mul.f32 %f207, %f26, %f200;\n"
2161 " fma.rn.f32 %f208, %f25, %f199, %f206;\n"
2162 " fma.rn.f32 %f209, %f25, %f198, %f207;\n"
2163 " fma.rn.f32 %f210, %f27, %f203, %f208;\n"
2164 " fma.rn.f32 %f211, %f27, %f202, %f209;\n"
2165 " fma.rn.f32 %f212, %f28, %f205, %f210;\n"
2166 " fma.rn.f32 %f213, %f28, %f204, %f211;\n"
2167 " mul.f32 %f214, %f147, %f180;\n"
2168 " mul.f32 %f215, %f147, %f181;\n"
2169 " fma.rn.f32 %f216, %f146, %f164, %f214;\n"
2170 " fma.rn.f32 %f217, %f146, %f165, %f215;\n"
2171 " fma.rn.f32 %f218, %f148, %f196, %f216;\n"
2172 " fma.rn.f32 %f219, %f148, %f197, %f217;\n"
2173 " fma.rn.f32 %f220, %f149, %f212, %f218;\n"
2174 " fma.rn.f32 %f221, %f149, %f213, %f219;\n"
2175 " mul.f32 %f222, %f220, %f4;\n"
2176 " mul.f32 %f223, %f221, %f4;\n"
2177 " cvt.rzi.u16.f32 %rs1, %f222;\n"
2178 " cvt.rzi.u16.f32 %rs2, %f223;\n"
2179 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
2180 " mul.wide.s32 %rd20, %r80, 2;\n"
2181 " add.s64 %rd21, %rd1, %rd20;\n"
2182 " st.global.v2.u8 [%rd21], {%rs1, %rs2};\n"
2187 " // .globl Subsample_Lanczos_uchar4\n"
2188 ".visible .entry Subsample_Lanczos_uchar4(\n"
2189 " .param .u64 Subsample_Lanczos_uchar4_param_0,\n"
2190 " .param .u64 Subsample_Lanczos_uchar4_param_1,\n"
2191 " .param .u32 Subsample_Lanczos_uchar4_param_2,\n"
2192 " .param .u32 Subsample_Lanczos_uchar4_param_3,\n"
2193 " .param .u32 Subsample_Lanczos_uchar4_param_4,\n"
2194 " .param .u32 Subsample_Lanczos_uchar4_param_5,\n"
2195 " .param .u32 Subsample_Lanczos_uchar4_param_6,\n"
2196 " .param .u32 Subsample_Lanczos_uchar4_param_7,\n"
2197 " .param .f32 Subsample_Lanczos_uchar4_param_8\n"
2200 " .reg .pred %p<13>;\n"
2201 " .reg .b16 %rs<5>;\n"
2202 " .reg .f32 %f<306>;\n"
2203 " .reg .b32 %r<81>;\n"
2204 " .reg .b64 %rd<22>;\n"
2206 " ld.param.u32 %r4, [Subsample_Lanczos_uchar4_param_3];\n"
2207 " ld.param.u32 %r3, [Subsample_Lanczos_uchar4_param_2];\n"
2208 " // begin inline asm\n"
2209 " mov.u32 %r9, %ctaid.x;\n"
2210 " // end inline asm\n"
2211 " // begin inline asm\n"
2212 " mov.u32 %r10, %ctaid.y;\n"
2213 " // end inline asm\n"
2214 " // begin inline asm\n"
2215 " mov.u32 %r11, %ntid.x;\n"
2216 " // end inline asm\n"
2217 " // begin inline asm\n"
2218 " mov.u32 %r12, %ntid.y;\n"
2219 " // end inline asm\n"
2220 " // begin inline asm\n"
2221 " mov.u32 %r13, %tid.x;\n"
2222 " // end inline asm\n"
2223 " // begin inline asm\n"
2224 " mov.u32 %r14, %tid.y;\n"
2225 " // end inline asm\n"
2226 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
2227 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
2228 " setp.ge.s32 %p1, %r2, %r4;\n"
2229 " setp.ge.s32 %p2, %r1, %r3;\n"
2230 " or.pred %p3, %p2, %p1;\n"
2231 " @%p3 bra LBB8_18;\n"
2232 " ld.param.u32 %r7, [Subsample_Lanczos_uchar4_param_6];\n"
2233 " ld.param.u32 %r6, [Subsample_Lanczos_uchar4_param_5];\n"
2234 " cvt.rn.f32.s32 %f50, %r6;\n"
2235 " cvt.rn.f32.s32 %f51, %r3;\n"
2236 " div.rn.f32 %f52, %f50, %f51;\n"
2237 " cvt.rn.f32.s32 %f53, %r7;\n"
2238 " cvt.rn.f32.s32 %f54, %r4;\n"
2239 " div.rn.f32 %f55, %f53, %f54;\n"
2240 " cvt.rn.f32.s32 %f56, %r1;\n"
2241 " add.f32 %f57, %f56, 0f3F000000;\n"
2242 " fma.rn.f32 %f58, %f52, %f57, 0fBF000000;\n"
2243 " cvt.rn.f32.s32 %f59, %r2;\n"
2244 " add.f32 %f60, %f59, 0f3F000000;\n"
2245 " cvt.rmi.f32.f32 %f113, %f58;\n"
2246 " sub.f32 %f62, %f58, %f113;\n"
2247 " add.f32 %f63, %f62, 0f3F800000;\n"
2248 " mul.f32 %f5, %f63, 0f40490FDB;\n"
2249 " mul.f32 %f6, %f62, 0f40490FDB;\n"
2250 " add.f32 %f64, %f62, 0fBF800000;\n"
2251 " setp.eq.f32 %p5, %f5, 0f00000000;\n"
2252 " mov.f32 %f305, 0f3F800000;\n"
2253 " mov.f32 %f298, %f305;\n"
2254 " @%p5 bra LBB8_3;\n"
2255 " mul.f32 %f9, %f5, 0f3F000000;\n"
2256 " mul.f32 %f66, %f5, %f5;\n"
2257 " mul.f32 %f10, %f66, 0f3F000000;\n"
2258 " sin.approx.f32 %f67, %f5;\n"
2259 " sin.approx.f32 %f68, %f9;\n"
2260 " mul.f32 %f69, %f67, %f68;\n"
2261 " div.rn.f32 %f298, %f69, %f10;\n"
2263 " fma.rn.f32 %f61, %f55, %f60, 0fBF000000;\n"
2264 " add.f32 %f65, %f62, 0fC0000000;\n"
2265 " mul.f32 %f7, %f64, 0f40490FDB;\n"
2266 " setp.eq.f32 %p6, %f6, 0f00000000;\n"
2267 " mov.f32 %f299, %f305;\n"
2268 " @%p6 bra LBB8_5;\n"
2269 " mul.f32 %f13, %f6, 0f3F000000;\n"
2270 " mul.f32 %f71, %f6, %f6;\n"
2271 " mul.f32 %f14, %f71, 0f3F000000;\n"
2272 " sin.approx.f32 %f72, %f6;\n"
2273 " sin.approx.f32 %f73, %f13;\n"
2274 " mul.f32 %f74, %f72, %f73;\n"
2275 " div.rn.f32 %f299, %f74, %f14;\n"
2277 " cvt.rmi.f32.f32 %f120, %f61;\n"
2278 " mul.f32 %f8, %f65, 0f40490FDB;\n"
2279 " setp.eq.f32 %p7, %f7, 0f00000000;\n"
2280 " mov.f32 %f300, %f305;\n"
2281 " @%p7 bra LBB8_7;\n"
2282 " mul.f32 %f17, %f7, 0f3F000000;\n"
2283 " mul.f32 %f76, %f7, %f7;\n"
2284 " mul.f32 %f18, %f76, 0f3F000000;\n"
2285 " sin.approx.f32 %f77, %f7;\n"
2286 " sin.approx.f32 %f78, %f17;\n"
2287 " mul.f32 %f79, %f77, %f78;\n"
2288 " div.rn.f32 %f300, %f79, %f18;\n"
2290 " sub.f32 %f3, %f61, %f120;\n"
2291 " setp.eq.f32 %p8, %f8, 0f00000000;\n"
2292 " mov.f32 %f301, %f305;\n"
2293 " @%p8 bra LBB8_9;\n"
2294 " mul.f32 %f21, %f8, 0f3F000000;\n"
2295 " mul.f32 %f81, %f8, %f8;\n"
2296 " mul.f32 %f22, %f81, 0f3F000000;\n"
2297 " sin.approx.f32 %f82, %f8;\n"
2298 " sin.approx.f32 %f83, %f21;\n"
2299 " mul.f32 %f84, %f82, %f83;\n"
2300 " div.rn.f32 %f301, %f84, %f22;\n"
2302 " ld.param.u32 %r8, [Subsample_Lanczos_uchar4_param_7];\n"
2303 " add.f32 %f86, %f298, %f299;\n"
2304 " add.f32 %f89, %f3, 0f3F800000;\n"
2305 " mul.f32 %f29, %f89, 0f40490FDB;\n"
2306 " mul.f32 %f30, %f3, 0f40490FDB;\n"
2307 " add.f32 %f90, %f3, 0fBF800000;\n"
2308 " setp.eq.f32 %p9, %f29, 0f00000000;\n"
2309 " mov.f32 %f302, %f305;\n"
2310 " @%p9 bra LBB8_11;\n"
2311 " mul.f32 %f33, %f29, 0f3F000000;\n"
2312 " mul.f32 %f92, %f29, %f29;\n"
2313 " mul.f32 %f34, %f92, 0f3F000000;\n"
2314 " sin.approx.f32 %f93, %f29;\n"
2315 " sin.approx.f32 %f94, %f33;\n"
2316 " mul.f32 %f95, %f93, %f94;\n"
2317 " div.rn.f32 %f302, %f95, %f34;\n"
2319 " setp.gt.s32 %p4, %r8, 8;\n"
2320 " add.f32 %f87, %f86, %f300;\n"
2321 " add.f32 %f91, %f3, 0fC0000000;\n"
2322 " mul.f32 %f31, %f90, 0f40490FDB;\n"
2323 " setp.eq.f32 %p10, %f30, 0f00000000;\n"
2324 " mov.f32 %f303, %f305;\n"
2325 " @%p10 bra LBB8_13;\n"
2326 " mul.f32 %f37, %f30, 0f3F000000;\n"
2327 " mul.f32 %f97, %f30, %f30;\n"
2328 " mul.f32 %f38, %f97, 0f3F000000;\n"
2329 " sin.approx.f32 %f98, %f30;\n"
2330 " sin.approx.f32 %f99, %f37;\n"
2331 " mul.f32 %f100, %f98, %f99;\n"
2332 " div.rn.f32 %f303, %f100, %f38;\n"
2334 " ld.param.u64 %rd3, [Subsample_Lanczos_uchar4_param_1];\n"
2335 " selp.b32 %r15, 65535, 255, %p4;\n"
2336 " add.f32 %f88, %f87, %f301;\n"
2337 " mul.f32 %f32, %f91, 0f40490FDB;\n"
2338 " setp.eq.f32 %p11, %f31, 0f00000000;\n"
2339 " mov.f32 %f304, %f305;\n"
2340 " @%p11 bra LBB8_15;\n"
2341 " mul.f32 %f41, %f31, 0f3F000000;\n"
2342 " mul.f32 %f102, %f31, %f31;\n"
2343 " mul.f32 %f42, %f102, 0f3F000000;\n"
2344 " sin.approx.f32 %f103, %f31;\n"
2345 " sin.approx.f32 %f104, %f41;\n"
2346 " mul.f32 %f105, %f103, %f104;\n"
2347 " div.rn.f32 %f304, %f105, %f42;\n"
2349 " ld.param.u32 %r5, [Subsample_Lanczos_uchar4_param_4];\n"
2350 " ld.param.u64 %rd4, [Subsample_Lanczos_uchar4_param_0];\n"
2351 " cvta.to.global.u64 %rd1, %rd3;\n"
2352 " cvt.rn.f32.s32 %f4, %r15;\n"
2353 " div.rn.f32 %f25, %f298, %f88;\n"
2354 " div.rn.f32 %f26, %f299, %f88;\n"
2355 " div.rn.f32 %f27, %f300, %f88;\n"
2356 " div.rn.f32 %f28, %f301, %f88;\n"
2357 " setp.eq.f32 %p12, %f32, 0f00000000;\n"
2358 " @%p12 bra LBB8_17;\n"
2359 " mul.f32 %f45, %f32, 0f3F000000;\n"
2360 " mul.f32 %f107, %f32, %f32;\n"
2361 " mul.f32 %f46, %f107, 0f3F000000;\n"
2362 " sin.approx.f32 %f108, %f32;\n"
2363 " sin.approx.f32 %f109, %f45;\n"
2364 " mul.f32 %f110, %f108, %f109;\n"
2365 " div.rn.f32 %f305, %f110, %f46;\n"
2367 " add.f32 %f143, %f302, %f303;\n"
2368 " add.f32 %f144, %f143, %f304;\n"
2369 " add.f32 %f145, %f144, %f305;\n"
2370 " div.rn.f32 %f146, %f302, %f145;\n"
2371 " div.rn.f32 %f147, %f303, %f145;\n"
2372 " div.rn.f32 %f148, %f304, %f145;\n"
2373 " div.rn.f32 %f149, %f305, %f145;\n"
2374 " add.f32 %f111, %f113, 0fBF800000;\n"
2375 " add.f32 %f112, %f120, 0fBF800000;\n"
2376 " // begin inline asm\n"
2377 " tex.2d.v4.f32.f32 {%r16, %r17, %r18, %r19}, [%rd4, {%f111, %f112}];\n"
2378 " // end inline asm\n"
2379 " mov.b32 %f150, %r19;\n"
2380 " mov.b32 %f151, %r18;\n"
2381 " mov.b32 %f152, %r17;\n"
2382 " mov.b32 %f153, %r16;\n"
2383 " // begin inline asm\n"
2384 " tex.2d.v4.f32.f32 {%r20, %r21, %r22, %r23}, [%rd4, {%f113, %f112}];\n"
2385 " // end inline asm\n"
2386 " mov.b32 %f154, %r23;\n"
2387 " mov.b32 %f155, %r22;\n"
2388 " mov.b32 %f156, %r21;\n"
2389 " mov.b32 %f157, %r20;\n"
2390 " add.f32 %f115, %f113, 0f3F800000;\n"
2391 " // begin inline asm\n"
2392 " tex.2d.v4.f32.f32 {%r24, %r25, %r26, %r27}, [%rd4, {%f115, %f112}];\n"
2393 " // end inline asm\n"
2394 " mov.b32 %f158, %r27;\n"
2395 " mov.b32 %f159, %r26;\n"
2396 " mov.b32 %f160, %r25;\n"
2397 " mov.b32 %f161, %r24;\n"
2398 " add.f32 %f117, %f113, 0f40000000;\n"
2399 " // begin inline asm\n"
2400 " tex.2d.v4.f32.f32 {%r28, %r29, %r30, %r31}, [%rd4, {%f117, %f112}];\n"
2401 " // end inline asm\n"
2402 " mov.b32 %f162, %r31;\n"
2403 " mov.b32 %f163, %r30;\n"
2404 " mov.b32 %f164, %r29;\n"
2405 " mov.b32 %f165, %r28;\n"
2406 " mul.f32 %f166, %f26, %f157;\n"
2407 " mul.f32 %f167, %f26, %f156;\n"
2408 " mul.f32 %f168, %f26, %f155;\n"
2409 " mul.f32 %f169, %f26, %f154;\n"
2410 " fma.rn.f32 %f170, %f25, %f153, %f166;\n"
2411 " fma.rn.f32 %f171, %f25, %f152, %f167;\n"
2412 " fma.rn.f32 %f172, %f25, %f151, %f168;\n"
2413 " fma.rn.f32 %f173, %f25, %f150, %f169;\n"
2414 " fma.rn.f32 %f174, %f27, %f161, %f170;\n"
2415 " fma.rn.f32 %f175, %f27, %f160, %f171;\n"
2416 " fma.rn.f32 %f176, %f27, %f159, %f172;\n"
2417 " fma.rn.f32 %f177, %f27, %f158, %f173;\n"
2418 " fma.rn.f32 %f178, %f28, %f165, %f174;\n"
2419 " fma.rn.f32 %f179, %f28, %f164, %f175;\n"
2420 " fma.rn.f32 %f180, %f28, %f163, %f176;\n"
2421 " fma.rn.f32 %f181, %f28, %f162, %f177;\n"
2422 " // begin inline asm\n"
2423 " tex.2d.v4.f32.f32 {%r32, %r33, %r34, %r35}, [%rd4, {%f111, %f120}];\n"
2424 " // end inline asm\n"
2425 " mov.b32 %f182, %r35;\n"
2426 " mov.b32 %f183, %r34;\n"
2427 " mov.b32 %f184, %r33;\n"
2428 " mov.b32 %f185, %r32;\n"
2429 " // begin inline asm\n"
2430 " tex.2d.v4.f32.f32 {%r36, %r37, %r38, %r39}, [%rd4, {%f113, %f120}];\n"
2431 " // end inline asm\n"
2432 " mov.b32 %f186, %r39;\n"
2433 " mov.b32 %f187, %r38;\n"
2434 " mov.b32 %f188, %r37;\n"
2435 " mov.b32 %f189, %r36;\n"
2436 " // begin inline asm\n"
2437 " tex.2d.v4.f32.f32 {%r40, %r41, %r42, %r43}, [%rd4, {%f115, %f120}];\n"
2438 " // end inline asm\n"
2439 " mov.b32 %f190, %r43;\n"
2440 " mov.b32 %f191, %r42;\n"
2441 " mov.b32 %f192, %r41;\n"
2442 " mov.b32 %f193, %r40;\n"
2443 " // begin inline asm\n"
2444 " tex.2d.v4.f32.f32 {%r44, %r45, %r46, %r47}, [%rd4, {%f117, %f120}];\n"
2445 " // end inline asm\n"
2446 " mov.b32 %f194, %r47;\n"
2447 " mov.b32 %f195, %r46;\n"
2448 " mov.b32 %f196, %r45;\n"
2449 " mov.b32 %f197, %r44;\n"
2450 " mul.f32 %f198, %f26, %f189;\n"
2451 " mul.f32 %f199, %f26, %f188;\n"
2452 " mul.f32 %f200, %f26, %f187;\n"
2453 " mul.f32 %f201, %f26, %f186;\n"
2454 " fma.rn.f32 %f202, %f25, %f185, %f198;\n"
2455 " fma.rn.f32 %f203, %f25, %f184, %f199;\n"
2456 " fma.rn.f32 %f204, %f25, %f183, %f200;\n"
2457 " fma.rn.f32 %f205, %f25, %f182, %f201;\n"
2458 " fma.rn.f32 %f206, %f27, %f193, %f202;\n"
2459 " fma.rn.f32 %f207, %f27, %f192, %f203;\n"
2460 " fma.rn.f32 %f208, %f27, %f191, %f204;\n"
2461 " fma.rn.f32 %f209, %f27, %f190, %f205;\n"
2462 " fma.rn.f32 %f210, %f28, %f197, %f206;\n"
2463 " fma.rn.f32 %f211, %f28, %f196, %f207;\n"
2464 " fma.rn.f32 %f212, %f28, %f195, %f208;\n"
2465 " fma.rn.f32 %f213, %f28, %f194, %f209;\n"
2466 " add.f32 %f128, %f120, 0f3F800000;\n"
2467 " // begin inline asm\n"
2468 " tex.2d.v4.f32.f32 {%r48, %r49, %r50, %r51}, [%rd4, {%f111, %f128}];\n"
2469 " // end inline asm\n"
2470 " mov.b32 %f214, %r51;\n"
2471 " mov.b32 %f215, %r50;\n"
2472 " mov.b32 %f216, %r49;\n"
2473 " mov.b32 %f217, %r48;\n"
2474 " // begin inline asm\n"
2475 " tex.2d.v4.f32.f32 {%r52, %r53, %r54, %r55}, [%rd4, {%f113, %f128}];\n"
2476 " // end inline asm\n"
2477 " mov.b32 %f218, %r55;\n"
2478 " mov.b32 %f219, %r54;\n"
2479 " mov.b32 %f220, %r53;\n"
2480 " mov.b32 %f221, %r52;\n"
2481 " // begin inline asm\n"
2482 " tex.2d.v4.f32.f32 {%r56, %r57, %r58, %r59}, [%rd4, {%f115, %f128}];\n"
2483 " // end inline asm\n"
2484 " mov.b32 %f222, %r59;\n"
2485 " mov.b32 %f223, %r58;\n"
2486 " mov.b32 %f224, %r57;\n"
2487 " mov.b32 %f225, %r56;\n"
2488 " // begin inline asm\n"
2489 " tex.2d.v4.f32.f32 {%r60, %r61, %r62, %r63}, [%rd4, {%f117, %f128}];\n"
2490 " // end inline asm\n"
2491 " mov.b32 %f226, %r63;\n"
2492 " mov.b32 %f227, %r62;\n"
2493 " mov.b32 %f228, %r61;\n"
2494 " mov.b32 %f229, %r60;\n"
2495 " mul.f32 %f230, %f26, %f221;\n"
2496 " mul.f32 %f231, %f26, %f220;\n"
2497 " mul.f32 %f232, %f26, %f219;\n"
2498 " mul.f32 %f233, %f26, %f218;\n"
2499 " fma.rn.f32 %f234, %f25, %f217, %f230;\n"
2500 " fma.rn.f32 %f235, %f25, %f216, %f231;\n"
2501 " fma.rn.f32 %f236, %f25, %f215, %f232;\n"
2502 " fma.rn.f32 %f237, %f25, %f214, %f233;\n"
2503 " fma.rn.f32 %f238, %f27, %f225, %f234;\n"
2504 " fma.rn.f32 %f239, %f27, %f224, %f235;\n"
2505 " fma.rn.f32 %f240, %f27, %f223, %f236;\n"
2506 " fma.rn.f32 %f241, %f27, %f222, %f237;\n"
2507 " fma.rn.f32 %f242, %f28, %f229, %f238;\n"
2508 " fma.rn.f32 %f243, %f28, %f228, %f239;\n"
2509 " fma.rn.f32 %f244, %f28, %f227, %f240;\n"
2510 " fma.rn.f32 %f245, %f28, %f226, %f241;\n"
2511 " add.f32 %f136, %f120, 0f40000000;\n"
2512 " // begin inline asm\n"
2513 " tex.2d.v4.f32.f32 {%r64, %r65, %r66, %r67}, [%rd4, {%f111, %f136}];\n"
2514 " // end inline asm\n"
2515 " mov.b32 %f246, %r67;\n"
2516 " mov.b32 %f247, %r66;\n"
2517 " mov.b32 %f248, %r65;\n"
2518 " mov.b32 %f249, %r64;\n"
2519 " // begin inline asm\n"
2520 " tex.2d.v4.f32.f32 {%r68, %r69, %r70, %r71}, [%rd4, {%f113, %f136}];\n"
2521 " // end inline asm\n"
2522 " mov.b32 %f250, %r71;\n"
2523 " mov.b32 %f251, %r70;\n"
2524 " mov.b32 %f252, %r69;\n"
2525 " mov.b32 %f253, %r68;\n"
2526 " // begin inline asm\n"
2527 " tex.2d.v4.f32.f32 {%r72, %r73, %r74, %r75}, [%rd4, {%f115, %f136}];\n"
2528 " // end inline asm\n"
2529 " mov.b32 %f254, %r75;\n"
2530 " mov.b32 %f255, %r74;\n"
2531 " mov.b32 %f256, %r73;\n"
2532 " mov.b32 %f257, %r72;\n"
2533 " // begin inline asm\n"
2534 " tex.2d.v4.f32.f32 {%r76, %r77, %r78, %r79}, [%rd4, {%f117, %f136}];\n"
2535 " // end inline asm\n"
2536 " mov.b32 %f258, %r79;\n"
2537 " mov.b32 %f259, %r78;\n"
2538 " mov.b32 %f260, %r77;\n"
2539 " mov.b32 %f261, %r76;\n"
2540 " mul.f32 %f262, %f26, %f253;\n"
2541 " mul.f32 %f263, %f26, %f252;\n"
2542 " mul.f32 %f264, %f26, %f251;\n"
2543 " mul.f32 %f265, %f26, %f250;\n"
2544 " fma.rn.f32 %f266, %f25, %f249, %f262;\n"
2545 " fma.rn.f32 %f267, %f25, %f248, %f263;\n"
2546 " fma.rn.f32 %f268, %f25, %f247, %f264;\n"
2547 " fma.rn.f32 %f269, %f25, %f246, %f265;\n"
2548 " fma.rn.f32 %f270, %f27, %f257, %f266;\n"
2549 " fma.rn.f32 %f271, %f27, %f256, %f267;\n"
2550 " fma.rn.f32 %f272, %f27, %f255, %f268;\n"
2551 " fma.rn.f32 %f273, %f27, %f254, %f269;\n"
2552 " fma.rn.f32 %f274, %f28, %f261, %f270;\n"
2553 " fma.rn.f32 %f275, %f28, %f260, %f271;\n"
2554 " fma.rn.f32 %f276, %f28, %f259, %f272;\n"
2555 " fma.rn.f32 %f277, %f28, %f258, %f273;\n"
2556 " mul.f32 %f278, %f147, %f210;\n"
2557 " mul.f32 %f279, %f147, %f211;\n"
2558 " mul.f32 %f280, %f147, %f212;\n"
2559 " mul.f32 %f281, %f147, %f213;\n"
2560 " fma.rn.f32 %f282, %f146, %f178, %f278;\n"
2561 " fma.rn.f32 %f283, %f146, %f179, %f279;\n"
2562 " fma.rn.f32 %f284, %f146, %f180, %f280;\n"
2563 " fma.rn.f32 %f285, %f146, %f181, %f281;\n"
2564 " fma.rn.f32 %f286, %f148, %f242, %f282;\n"
2565 " fma.rn.f32 %f287, %f148, %f243, %f283;\n"
2566 " fma.rn.f32 %f288, %f148, %f244, %f284;\n"
2567 " fma.rn.f32 %f289, %f148, %f245, %f285;\n"
2568 " fma.rn.f32 %f290, %f149, %f274, %f286;\n"
2569 " fma.rn.f32 %f291, %f149, %f275, %f287;\n"
2570 " fma.rn.f32 %f292, %f149, %f276, %f288;\n"
2571 " fma.rn.f32 %f293, %f149, %f277, %f289;\n"
2572 " mul.f32 %f294, %f290, %f4;\n"
2573 " mul.f32 %f295, %f291, %f4;\n"
2574 " mul.f32 %f296, %f292, %f4;\n"
2575 " mul.f32 %f297, %f293, %f4;\n"
2576 " cvt.rzi.u16.f32 %rs1, %f294;\n"
2577 " cvt.rzi.u16.f32 %rs2, %f295;\n"
2578 " cvt.rzi.u16.f32 %rs3, %f296;\n"
2579 " cvt.rzi.u16.f32 %rs4, %f297;\n"
2580 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
2581 " mul.wide.s32 %rd20, %r80, 4;\n"
2582 " add.s64 %rd21, %rd1, %rd20;\n"
2583 " st.global.v4.u8 [%rd21], {%rs1, %rs2, %rs3, %rs4};\n"
2588 " // .globl Subsample_Lanczos_ushort\n"
2589 ".visible .entry Subsample_Lanczos_ushort(\n"
2590 " .param .u64 Subsample_Lanczos_ushort_param_0,\n"
2591 " .param .u64 Subsample_Lanczos_ushort_param_1,\n"
2592 " .param .u32 Subsample_Lanczos_ushort_param_2,\n"
2593 " .param .u32 Subsample_Lanczos_ushort_param_3,\n"
2594 " .param .u32 Subsample_Lanczos_ushort_param_4,\n"
2595 " .param .u32 Subsample_Lanczos_ushort_param_5,\n"
2596 " .param .u32 Subsample_Lanczos_ushort_param_6,\n"
2597 " .param .u32 Subsample_Lanczos_ushort_param_7,\n"
2598 " .param .f32 Subsample_Lanczos_ushort_param_8\n"
2601 " .reg .pred %p<13>;\n"
2602 " .reg .b16 %rs<2>;\n"
2603 " .reg .f32 %f<195>;\n"
2604 " .reg .b32 %r<81>;\n"
2605 " .reg .b64 %rd<22>;\n"
2607 " ld.param.u32 %r4, [Subsample_Lanczos_ushort_param_3];\n"
2608 " ld.param.u32 %r3, [Subsample_Lanczos_ushort_param_2];\n"
2609 " // begin inline asm\n"
2610 " mov.u32 %r9, %ctaid.x;\n"
2611 " // end inline asm\n"
2612 " // begin inline asm\n"
2613 " mov.u32 %r10, %ctaid.y;\n"
2614 " // end inline asm\n"
2615 " // begin inline asm\n"
2616 " mov.u32 %r11, %ntid.x;\n"
2617 " // end inline asm\n"
2618 " // begin inline asm\n"
2619 " mov.u32 %r12, %ntid.y;\n"
2620 " // end inline asm\n"
2621 " // begin inline asm\n"
2622 " mov.u32 %r13, %tid.x;\n"
2623 " // end inline asm\n"
2624 " // begin inline asm\n"
2625 " mov.u32 %r14, %tid.y;\n"
2626 " // end inline asm\n"
2627 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
2628 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
2629 " setp.ge.s32 %p1, %r2, %r4;\n"
2630 " setp.ge.s32 %p2, %r1, %r3;\n"
2631 " or.pred %p3, %p2, %p1;\n"
2632 " @%p3 bra LBB9_18;\n"
2633 " ld.param.u32 %r7, [Subsample_Lanczos_ushort_param_6];\n"
2634 " ld.param.u32 %r6, [Subsample_Lanczos_ushort_param_5];\n"
2635 " cvt.rn.f32.s32 %f50, %r6;\n"
2636 " cvt.rn.f32.s32 %f51, %r3;\n"
2637 " div.rn.f32 %f52, %f50, %f51;\n"
2638 " cvt.rn.f32.s32 %f53, %r7;\n"
2639 " cvt.rn.f32.s32 %f54, %r4;\n"
2640 " div.rn.f32 %f55, %f53, %f54;\n"
2641 " cvt.rn.f32.s32 %f56, %r1;\n"
2642 " add.f32 %f57, %f56, 0f3F000000;\n"
2643 " fma.rn.f32 %f58, %f52, %f57, 0fBF000000;\n"
2644 " cvt.rn.f32.s32 %f59, %r2;\n"
2645 " add.f32 %f60, %f59, 0f3F000000;\n"
2646 " cvt.rmi.f32.f32 %f113, %f58;\n"
2647 " sub.f32 %f62, %f58, %f113;\n"
2648 " add.f32 %f63, %f62, 0f3F800000;\n"
2649 " mul.f32 %f5, %f63, 0f40490FDB;\n"
2650 " mul.f32 %f6, %f62, 0f40490FDB;\n"
2651 " add.f32 %f64, %f62, 0fBF800000;\n"
2652 " setp.eq.f32 %p5, %f5, 0f00000000;\n"
2653 " mov.f32 %f194, 0f3F800000;\n"
2654 " mov.f32 %f187, %f194;\n"
2655 " @%p5 bra LBB9_3;\n"
2656 " mul.f32 %f9, %f5, 0f3F000000;\n"
2657 " mul.f32 %f66, %f5, %f5;\n"
2658 " mul.f32 %f10, %f66, 0f3F000000;\n"
2659 " sin.approx.f32 %f67, %f5;\n"
2660 " sin.approx.f32 %f68, %f9;\n"
2661 " mul.f32 %f69, %f67, %f68;\n"
2662 " div.rn.f32 %f187, %f69, %f10;\n"
2664 " fma.rn.f32 %f61, %f55, %f60, 0fBF000000;\n"
2665 " add.f32 %f65, %f62, 0fC0000000;\n"
2666 " mul.f32 %f7, %f64, 0f40490FDB;\n"
2667 " setp.eq.f32 %p6, %f6, 0f00000000;\n"
2668 " mov.f32 %f188, %f194;\n"
2669 " @%p6 bra LBB9_5;\n"
2670 " mul.f32 %f13, %f6, 0f3F000000;\n"
2671 " mul.f32 %f71, %f6, %f6;\n"
2672 " mul.f32 %f14, %f71, 0f3F000000;\n"
2673 " sin.approx.f32 %f72, %f6;\n"
2674 " sin.approx.f32 %f73, %f13;\n"
2675 " mul.f32 %f74, %f72, %f73;\n"
2676 " div.rn.f32 %f188, %f74, %f14;\n"
2678 " cvt.rmi.f32.f32 %f120, %f61;\n"
2679 " mul.f32 %f8, %f65, 0f40490FDB;\n"
2680 " setp.eq.f32 %p7, %f7, 0f00000000;\n"
2681 " mov.f32 %f189, %f194;\n"
2682 " @%p7 bra LBB9_7;\n"
2683 " mul.f32 %f17, %f7, 0f3F000000;\n"
2684 " mul.f32 %f76, %f7, %f7;\n"
2685 " mul.f32 %f18, %f76, 0f3F000000;\n"
2686 " sin.approx.f32 %f77, %f7;\n"
2687 " sin.approx.f32 %f78, %f17;\n"
2688 " mul.f32 %f79, %f77, %f78;\n"
2689 " div.rn.f32 %f189, %f79, %f18;\n"
2691 " sub.f32 %f3, %f61, %f120;\n"
2692 " setp.eq.f32 %p8, %f8, 0f00000000;\n"
2693 " mov.f32 %f190, %f194;\n"
2694 " @%p8 bra LBB9_9;\n"
2695 " mul.f32 %f21, %f8, 0f3F000000;\n"
2696 " mul.f32 %f81, %f8, %f8;\n"
2697 " mul.f32 %f22, %f81, 0f3F000000;\n"
2698 " sin.approx.f32 %f82, %f8;\n"
2699 " sin.approx.f32 %f83, %f21;\n"
2700 " mul.f32 %f84, %f82, %f83;\n"
2701 " div.rn.f32 %f190, %f84, %f22;\n"
2703 " ld.param.u32 %r8, [Subsample_Lanczos_ushort_param_7];\n"
2704 " add.f32 %f86, %f187, %f188;\n"
2705 " add.f32 %f89, %f3, 0f3F800000;\n"
2706 " mul.f32 %f29, %f89, 0f40490FDB;\n"
2707 " mul.f32 %f30, %f3, 0f40490FDB;\n"
2708 " add.f32 %f90, %f3, 0fBF800000;\n"
2709 " setp.eq.f32 %p9, %f29, 0f00000000;\n"
2710 " mov.f32 %f191, %f194;\n"
2711 " @%p9 bra LBB9_11;\n"
2712 " mul.f32 %f33, %f29, 0f3F000000;\n"
2713 " mul.f32 %f92, %f29, %f29;\n"
2714 " mul.f32 %f34, %f92, 0f3F000000;\n"
2715 " sin.approx.f32 %f93, %f29;\n"
2716 " sin.approx.f32 %f94, %f33;\n"
2717 " mul.f32 %f95, %f93, %f94;\n"
2718 " div.rn.f32 %f191, %f95, %f34;\n"
2720 " setp.gt.s32 %p4, %r8, 8;\n"
2721 " add.f32 %f87, %f86, %f189;\n"
2722 " add.f32 %f91, %f3, 0fC0000000;\n"
2723 " mul.f32 %f31, %f90, 0f40490FDB;\n"
2724 " setp.eq.f32 %p10, %f30, 0f00000000;\n"
2725 " mov.f32 %f192, %f194;\n"
2726 " @%p10 bra LBB9_13;\n"
2727 " mul.f32 %f37, %f30, 0f3F000000;\n"
2728 " mul.f32 %f97, %f30, %f30;\n"
2729 " mul.f32 %f38, %f97, 0f3F000000;\n"
2730 " sin.approx.f32 %f98, %f30;\n"
2731 " sin.approx.f32 %f99, %f37;\n"
2732 " mul.f32 %f100, %f98, %f99;\n"
2733 " div.rn.f32 %f192, %f100, %f38;\n"
2735 " ld.param.u64 %rd3, [Subsample_Lanczos_ushort_param_1];\n"
2736 " selp.b32 %r15, 65535, 255, %p4;\n"
2737 " add.f32 %f88, %f87, %f190;\n"
2738 " mul.f32 %f32, %f91, 0f40490FDB;\n"
2739 " setp.eq.f32 %p11, %f31, 0f00000000;\n"
2740 " mov.f32 %f193, %f194;\n"
2741 " @%p11 bra LBB9_15;\n"
2742 " mul.f32 %f41, %f31, 0f3F000000;\n"
2743 " mul.f32 %f102, %f31, %f31;\n"
2744 " mul.f32 %f42, %f102, 0f3F000000;\n"
2745 " sin.approx.f32 %f103, %f31;\n"
2746 " sin.approx.f32 %f104, %f41;\n"
2747 " mul.f32 %f105, %f103, %f104;\n"
2748 " div.rn.f32 %f193, %f105, %f42;\n"
2750 " ld.param.u32 %r5, [Subsample_Lanczos_ushort_param_4];\n"
2751 " ld.param.u64 %rd4, [Subsample_Lanczos_ushort_param_0];\n"
2752 " cvta.to.global.u64 %rd1, %rd3;\n"
2753 " cvt.rn.f32.s32 %f4, %r15;\n"
2754 " div.rn.f32 %f25, %f187, %f88;\n"
2755 " div.rn.f32 %f26, %f188, %f88;\n"
2756 " div.rn.f32 %f27, %f189, %f88;\n"
2757 " div.rn.f32 %f28, %f190, %f88;\n"
2758 " setp.eq.f32 %p12, %f32, 0f00000000;\n"
2759 " @%p12 bra LBB9_17;\n"
2760 " mul.f32 %f45, %f32, 0f3F000000;\n"
2761 " mul.f32 %f107, %f32, %f32;\n"
2762 " mul.f32 %f46, %f107, 0f3F000000;\n"
2763 " sin.approx.f32 %f108, %f32;\n"
2764 " sin.approx.f32 %f109, %f45;\n"
2765 " mul.f32 %f110, %f108, %f109;\n"
2766 " div.rn.f32 %f194, %f110, %f46;\n"
2768 " add.f32 %f143, %f191, %f192;\n"
2769 " add.f32 %f144, %f143, %f193;\n"
2770 " add.f32 %f145, %f144, %f194;\n"
2771 " div.rn.f32 %f146, %f191, %f145;\n"
2772 " div.rn.f32 %f147, %f192, %f145;\n"
2773 " div.rn.f32 %f148, %f193, %f145;\n"
2774 " div.rn.f32 %f149, %f194, %f145;\n"
2775 " add.f32 %f111, %f113, 0fBF800000;\n"
2776 " add.f32 %f112, %f120, 0fBF800000;\n"
2777 " // begin inline asm\n"
2778 " tex.2d.v4.f32.f32 {%r16, %r17, %r18, %r19}, [%rd4, {%f111, %f112}];\n"
2779 " // end inline asm\n"
2780 " mov.b32 %f150, %r16;\n"
2781 " // begin inline asm\n"
2782 " tex.2d.v4.f32.f32 {%r20, %r21, %r22, %r23}, [%rd4, {%f113, %f112}];\n"
2783 " // end inline asm\n"
2784 " mov.b32 %f151, %r20;\n"
2785 " add.f32 %f115, %f113, 0f3F800000;\n"
2786 " // begin inline asm\n"
2787 " tex.2d.v4.f32.f32 {%r24, %r25, %r26, %r27}, [%rd4, {%f115, %f112}];\n"
2788 " // end inline asm\n"
2789 " mov.b32 %f152, %r24;\n"
2790 " add.f32 %f117, %f113, 0f40000000;\n"
2791 " // begin inline asm\n"
2792 " tex.2d.v4.f32.f32 {%r28, %r29, %r30, %r31}, [%rd4, {%f117, %f112}];\n"
2793 " // end inline asm\n"
2794 " mov.b32 %f153, %r28;\n"
2795 " mul.f32 %f154, %f26, %f151;\n"
2796 " fma.rn.f32 %f155, %f25, %f150, %f154;\n"
2797 " fma.rn.f32 %f156, %f27, %f152, %f155;\n"
2798 " fma.rn.f32 %f157, %f28, %f153, %f156;\n"
2799 " // begin inline asm\n"
2800 " tex.2d.v4.f32.f32 {%r32, %r33, %r34, %r35}, [%rd4, {%f111, %f120}];\n"
2801 " // end inline asm\n"
2802 " mov.b32 %f158, %r32;\n"
2803 " // begin inline asm\n"
2804 " tex.2d.v4.f32.f32 {%r36, %r37, %r38, %r39}, [%rd4, {%f113, %f120}];\n"
2805 " // end inline asm\n"
2806 " mov.b32 %f159, %r36;\n"
2807 " // begin inline asm\n"
2808 " tex.2d.v4.f32.f32 {%r40, %r41, %r42, %r43}, [%rd4, {%f115, %f120}];\n"
2809 " // end inline asm\n"
2810 " mov.b32 %f160, %r40;\n"
2811 " // begin inline asm\n"
2812 " tex.2d.v4.f32.f32 {%r44, %r45, %r46, %r47}, [%rd4, {%f117, %f120}];\n"
2813 " // end inline asm\n"
2814 " mov.b32 %f161, %r44;\n"
2815 " mul.f32 %f162, %f26, %f159;\n"
2816 " fma.rn.f32 %f163, %f25, %f158, %f162;\n"
2817 " fma.rn.f32 %f164, %f27, %f160, %f163;\n"
2818 " fma.rn.f32 %f165, %f28, %f161, %f164;\n"
2819 " add.f32 %f128, %f120, 0f3F800000;\n"
2820 " // begin inline asm\n"
2821 " tex.2d.v4.f32.f32 {%r48, %r49, %r50, %r51}, [%rd4, {%f111, %f128}];\n"
2822 " // end inline asm\n"
2823 " mov.b32 %f166, %r48;\n"
2824 " // begin inline asm\n"
2825 " tex.2d.v4.f32.f32 {%r52, %r53, %r54, %r55}, [%rd4, {%f113, %f128}];\n"
2826 " // end inline asm\n"
2827 " mov.b32 %f167, %r52;\n"
2828 " // begin inline asm\n"
2829 " tex.2d.v4.f32.f32 {%r56, %r57, %r58, %r59}, [%rd4, {%f115, %f128}];\n"
2830 " // end inline asm\n"
2831 " mov.b32 %f168, %r56;\n"
2832 " // begin inline asm\n"
2833 " tex.2d.v4.f32.f32 {%r60, %r61, %r62, %r63}, [%rd4, {%f117, %f128}];\n"
2834 " // end inline asm\n"
2835 " mov.b32 %f169, %r60;\n"
2836 " mul.f32 %f170, %f26, %f167;\n"
2837 " fma.rn.f32 %f171, %f25, %f166, %f170;\n"
2838 " fma.rn.f32 %f172, %f27, %f168, %f171;\n"
2839 " fma.rn.f32 %f173, %f28, %f169, %f172;\n"
2840 " add.f32 %f136, %f120, 0f40000000;\n"
2841 " // begin inline asm\n"
2842 " tex.2d.v4.f32.f32 {%r64, %r65, %r66, %r67}, [%rd4, {%f111, %f136}];\n"
2843 " // end inline asm\n"
2844 " mov.b32 %f174, %r64;\n"
2845 " // begin inline asm\n"
2846 " tex.2d.v4.f32.f32 {%r68, %r69, %r70, %r71}, [%rd4, {%f113, %f136}];\n"
2847 " // end inline asm\n"
2848 " mov.b32 %f175, %r68;\n"
2849 " // begin inline asm\n"
2850 " tex.2d.v4.f32.f32 {%r72, %r73, %r74, %r75}, [%rd4, {%f115, %f136}];\n"
2851 " // end inline asm\n"
2852 " mov.b32 %f176, %r72;\n"
2853 " // begin inline asm\n"
2854 " tex.2d.v4.f32.f32 {%r76, %r77, %r78, %r79}, [%rd4, {%f117, %f136}];\n"
2855 " // end inline asm\n"
2856 " mov.b32 %f177, %r76;\n"
2857 " mul.f32 %f178, %f26, %f175;\n"
2858 " fma.rn.f32 %f179, %f25, %f174, %f178;\n"
2859 " fma.rn.f32 %f180, %f27, %f176, %f179;\n"
2860 " fma.rn.f32 %f181, %f28, %f177, %f180;\n"
2861 " mul.f32 %f182, %f147, %f165;\n"
2862 " fma.rn.f32 %f183, %f146, %f157, %f182;\n"
2863 " fma.rn.f32 %f184, %f148, %f173, %f183;\n"
2864 " fma.rn.f32 %f185, %f149, %f181, %f184;\n"
2865 " mul.f32 %f186, %f185, %f4;\n"
2866 " cvt.rzi.u16.f32 %rs1, %f186;\n"
2867 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
2868 " mul.wide.s32 %rd20, %r80, 2;\n"
2869 " add.s64 %rd21, %rd1, %rd20;\n"
2870 " st.global.u16 [%rd21], %rs1;\n"
2875 " // .globl Subsample_Lanczos_ushort2\n"
2876 ".visible .entry Subsample_Lanczos_ushort2(\n"
2877 " .param .u64 Subsample_Lanczos_ushort2_param_0,\n"
2878 " .param .u64 Subsample_Lanczos_ushort2_param_1,\n"
2879 " .param .u32 Subsample_Lanczos_ushort2_param_2,\n"
2880 " .param .u32 Subsample_Lanczos_ushort2_param_3,\n"
2881 " .param .u32 Subsample_Lanczos_ushort2_param_4,\n"
2882 " .param .u32 Subsample_Lanczos_ushort2_param_5,\n"
2883 " .param .u32 Subsample_Lanczos_ushort2_param_6,\n"
2884 " .param .u32 Subsample_Lanczos_ushort2_param_7,\n"
2885 " .param .f32 Subsample_Lanczos_ushort2_param_8\n"
2888 " .reg .pred %p<13>;\n"
2889 " .reg .b16 %rs<3>;\n"
2890 " .reg .f32 %f<232>;\n"
2891 " .reg .b32 %r<81>;\n"
2892 " .reg .b64 %rd<22>;\n"
2894 " ld.param.u32 %r4, [Subsample_Lanczos_ushort2_param_3];\n"
2895 " ld.param.u32 %r3, [Subsample_Lanczos_ushort2_param_2];\n"
2896 " // begin inline asm\n"
2897 " mov.u32 %r9, %ctaid.x;\n"
2898 " // end inline asm\n"
2899 " // begin inline asm\n"
2900 " mov.u32 %r10, %ctaid.y;\n"
2901 " // end inline asm\n"
2902 " // begin inline asm\n"
2903 " mov.u32 %r11, %ntid.x;\n"
2904 " // end inline asm\n"
2905 " // begin inline asm\n"
2906 " mov.u32 %r12, %ntid.y;\n"
2907 " // end inline asm\n"
2908 " // begin inline asm\n"
2909 " mov.u32 %r13, %tid.x;\n"
2910 " // end inline asm\n"
2911 " // begin inline asm\n"
2912 " mov.u32 %r14, %tid.y;\n"
2913 " // end inline asm\n"
2914 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
2915 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
2916 " setp.ge.s32 %p1, %r2, %r4;\n"
2917 " setp.ge.s32 %p2, %r1, %r3;\n"
2918 " or.pred %p3, %p2, %p1;\n"
2919 " @%p3 bra LBB10_18;\n"
2920 " ld.param.u32 %r7, [Subsample_Lanczos_ushort2_param_6];\n"
2921 " ld.param.u32 %r6, [Subsample_Lanczos_ushort2_param_5];\n"
2922 " cvt.rn.f32.s32 %f50, %r6;\n"
2923 " cvt.rn.f32.s32 %f51, %r3;\n"
2924 " div.rn.f32 %f52, %f50, %f51;\n"
2925 " cvt.rn.f32.s32 %f53, %r7;\n"
2926 " cvt.rn.f32.s32 %f54, %r4;\n"
2927 " div.rn.f32 %f55, %f53, %f54;\n"
2928 " cvt.rn.f32.s32 %f56, %r1;\n"
2929 " add.f32 %f57, %f56, 0f3F000000;\n"
2930 " fma.rn.f32 %f58, %f52, %f57, 0fBF000000;\n"
2931 " cvt.rn.f32.s32 %f59, %r2;\n"
2932 " add.f32 %f60, %f59, 0f3F000000;\n"
2933 " cvt.rmi.f32.f32 %f113, %f58;\n"
2934 " sub.f32 %f62, %f58, %f113;\n"
2935 " add.f32 %f63, %f62, 0f3F800000;\n"
2936 " mul.f32 %f5, %f63, 0f40490FDB;\n"
2937 " mul.f32 %f6, %f62, 0f40490FDB;\n"
2938 " add.f32 %f64, %f62, 0fBF800000;\n"
2939 " setp.eq.f32 %p5, %f5, 0f00000000;\n"
2940 " mov.f32 %f231, 0f3F800000;\n"
2941 " mov.f32 %f224, %f231;\n"
2942 " @%p5 bra LBB10_3;\n"
2943 " mul.f32 %f9, %f5, 0f3F000000;\n"
2944 " mul.f32 %f66, %f5, %f5;\n"
2945 " mul.f32 %f10, %f66, 0f3F000000;\n"
2946 " sin.approx.f32 %f67, %f5;\n"
2947 " sin.approx.f32 %f68, %f9;\n"
2948 " mul.f32 %f69, %f67, %f68;\n"
2949 " div.rn.f32 %f224, %f69, %f10;\n"
2951 " fma.rn.f32 %f61, %f55, %f60, 0fBF000000;\n"
2952 " add.f32 %f65, %f62, 0fC0000000;\n"
2953 " mul.f32 %f7, %f64, 0f40490FDB;\n"
2954 " setp.eq.f32 %p6, %f6, 0f00000000;\n"
2955 " mov.f32 %f225, %f231;\n"
2956 " @%p6 bra LBB10_5;\n"
2957 " mul.f32 %f13, %f6, 0f3F000000;\n"
2958 " mul.f32 %f71, %f6, %f6;\n"
2959 " mul.f32 %f14, %f71, 0f3F000000;\n"
2960 " sin.approx.f32 %f72, %f6;\n"
2961 " sin.approx.f32 %f73, %f13;\n"
2962 " mul.f32 %f74, %f72, %f73;\n"
2963 " div.rn.f32 %f225, %f74, %f14;\n"
2965 " cvt.rmi.f32.f32 %f120, %f61;\n"
2966 " mul.f32 %f8, %f65, 0f40490FDB;\n"
2967 " setp.eq.f32 %p7, %f7, 0f00000000;\n"
2968 " mov.f32 %f226, %f231;\n"
2969 " @%p7 bra LBB10_7;\n"
2970 " mul.f32 %f17, %f7, 0f3F000000;\n"
2971 " mul.f32 %f76, %f7, %f7;\n"
2972 " mul.f32 %f18, %f76, 0f3F000000;\n"
2973 " sin.approx.f32 %f77, %f7;\n"
2974 " sin.approx.f32 %f78, %f17;\n"
2975 " mul.f32 %f79, %f77, %f78;\n"
2976 " div.rn.f32 %f226, %f79, %f18;\n"
2978 " sub.f32 %f3, %f61, %f120;\n"
2979 " setp.eq.f32 %p8, %f8, 0f00000000;\n"
2980 " mov.f32 %f227, %f231;\n"
2981 " @%p8 bra LBB10_9;\n"
2982 " mul.f32 %f21, %f8, 0f3F000000;\n"
2983 " mul.f32 %f81, %f8, %f8;\n"
2984 " mul.f32 %f22, %f81, 0f3F000000;\n"
2985 " sin.approx.f32 %f82, %f8;\n"
2986 " sin.approx.f32 %f83, %f21;\n"
2987 " mul.f32 %f84, %f82, %f83;\n"
2988 " div.rn.f32 %f227, %f84, %f22;\n"
2990 " ld.param.u32 %r8, [Subsample_Lanczos_ushort2_param_7];\n"
2991 " add.f32 %f86, %f224, %f225;\n"
2992 " add.f32 %f89, %f3, 0f3F800000;\n"
2993 " mul.f32 %f29, %f89, 0f40490FDB;\n"
2994 " mul.f32 %f30, %f3, 0f40490FDB;\n"
2995 " add.f32 %f90, %f3, 0fBF800000;\n"
2996 " setp.eq.f32 %p9, %f29, 0f00000000;\n"
2997 " mov.f32 %f228, %f231;\n"
2998 " @%p9 bra LBB10_11;\n"
2999 " mul.f32 %f33, %f29, 0f3F000000;\n"
3000 " mul.f32 %f92, %f29, %f29;\n"
3001 " mul.f32 %f34, %f92, 0f3F000000;\n"
3002 " sin.approx.f32 %f93, %f29;\n"
3003 " sin.approx.f32 %f94, %f33;\n"
3004 " mul.f32 %f95, %f93, %f94;\n"
3005 " div.rn.f32 %f228, %f95, %f34;\n"
3007 " setp.gt.s32 %p4, %r8, 8;\n"
3008 " add.f32 %f87, %f86, %f226;\n"
3009 " add.f32 %f91, %f3, 0fC0000000;\n"
3010 " mul.f32 %f31, %f90, 0f40490FDB;\n"
3011 " setp.eq.f32 %p10, %f30, 0f00000000;\n"
3012 " mov.f32 %f229, %f231;\n"
3013 " @%p10 bra LBB10_13;\n"
3014 " mul.f32 %f37, %f30, 0f3F000000;\n"
3015 " mul.f32 %f97, %f30, %f30;\n"
3016 " mul.f32 %f38, %f97, 0f3F000000;\n"
3017 " sin.approx.f32 %f98, %f30;\n"
3018 " sin.approx.f32 %f99, %f37;\n"
3019 " mul.f32 %f100, %f98, %f99;\n"
3020 " div.rn.f32 %f229, %f100, %f38;\n"
3022 " ld.param.u64 %rd3, [Subsample_Lanczos_ushort2_param_1];\n"
3023 " selp.b32 %r15, 65535, 255, %p4;\n"
3024 " add.f32 %f88, %f87, %f227;\n"
3025 " mul.f32 %f32, %f91, 0f40490FDB;\n"
3026 " setp.eq.f32 %p11, %f31, 0f00000000;\n"
3027 " mov.f32 %f230, %f231;\n"
3028 " @%p11 bra LBB10_15;\n"
3029 " mul.f32 %f41, %f31, 0f3F000000;\n"
3030 " mul.f32 %f102, %f31, %f31;\n"
3031 " mul.f32 %f42, %f102, 0f3F000000;\n"
3032 " sin.approx.f32 %f103, %f31;\n"
3033 " sin.approx.f32 %f104, %f41;\n"
3034 " mul.f32 %f105, %f103, %f104;\n"
3035 " div.rn.f32 %f230, %f105, %f42;\n"
3037 " ld.param.u32 %r5, [Subsample_Lanczos_ushort2_param_4];\n"
3038 " ld.param.u64 %rd4, [Subsample_Lanczos_ushort2_param_0];\n"
3039 " cvta.to.global.u64 %rd1, %rd3;\n"
3040 " cvt.rn.f32.s32 %f4, %r15;\n"
3041 " div.rn.f32 %f25, %f224, %f88;\n"
3042 " div.rn.f32 %f26, %f225, %f88;\n"
3043 " div.rn.f32 %f27, %f226, %f88;\n"
3044 " div.rn.f32 %f28, %f227, %f88;\n"
3045 " setp.eq.f32 %p12, %f32, 0f00000000;\n"
3046 " @%p12 bra LBB10_17;\n"
3047 " mul.f32 %f45, %f32, 0f3F000000;\n"
3048 " mul.f32 %f107, %f32, %f32;\n"
3049 " mul.f32 %f46, %f107, 0f3F000000;\n"
3050 " sin.approx.f32 %f108, %f32;\n"
3051 " sin.approx.f32 %f109, %f45;\n"
3052 " mul.f32 %f110, %f108, %f109;\n"
3053 " div.rn.f32 %f231, %f110, %f46;\n"
3055 " add.f32 %f143, %f228, %f229;\n"
3056 " add.f32 %f144, %f143, %f230;\n"
3057 " add.f32 %f145, %f144, %f231;\n"
3058 " div.rn.f32 %f146, %f228, %f145;\n"
3059 " div.rn.f32 %f147, %f229, %f145;\n"
3060 " div.rn.f32 %f148, %f230, %f145;\n"
3061 " div.rn.f32 %f149, %f231, %f145;\n"
3062 " add.f32 %f111, %f113, 0fBF800000;\n"
3063 " add.f32 %f112, %f120, 0fBF800000;\n"
3064 " // begin inline asm\n"
3065 " tex.2d.v4.f32.f32 {%r16, %r17, %r18, %r19}, [%rd4, {%f111, %f112}];\n"
3066 " // end inline asm\n"
3067 " mov.b32 %f150, %r17;\n"
3068 " mov.b32 %f151, %r16;\n"
3069 " // begin inline asm\n"
3070 " tex.2d.v4.f32.f32 {%r20, %r21, %r22, %r23}, [%rd4, {%f113, %f112}];\n"
3071 " // end inline asm\n"
3072 " mov.b32 %f152, %r21;\n"
3073 " mov.b32 %f153, %r20;\n"
3074 " add.f32 %f115, %f113, 0f3F800000;\n"
3075 " // begin inline asm\n"
3076 " tex.2d.v4.f32.f32 {%r24, %r25, %r26, %r27}, [%rd4, {%f115, %f112}];\n"
3077 " // end inline asm\n"
3078 " mov.b32 %f154, %r25;\n"
3079 " mov.b32 %f155, %r24;\n"
3080 " add.f32 %f117, %f113, 0f40000000;\n"
3081 " // begin inline asm\n"
3082 " tex.2d.v4.f32.f32 {%r28, %r29, %r30, %r31}, [%rd4, {%f117, %f112}];\n"
3083 " // end inline asm\n"
3084 " mov.b32 %f156, %r29;\n"
3085 " mov.b32 %f157, %r28;\n"
3086 " mul.f32 %f158, %f26, %f153;\n"
3087 " mul.f32 %f159, %f26, %f152;\n"
3088 " fma.rn.f32 %f160, %f25, %f151, %f158;\n"
3089 " fma.rn.f32 %f161, %f25, %f150, %f159;\n"
3090 " fma.rn.f32 %f162, %f27, %f155, %f160;\n"
3091 " fma.rn.f32 %f163, %f27, %f154, %f161;\n"
3092 " fma.rn.f32 %f164, %f28, %f157, %f162;\n"
3093 " fma.rn.f32 %f165, %f28, %f156, %f163;\n"
3094 " // begin inline asm\n"
3095 " tex.2d.v4.f32.f32 {%r32, %r33, %r34, %r35}, [%rd4, {%f111, %f120}];\n"
3096 " // end inline asm\n"
3097 " mov.b32 %f166, %r33;\n"
3098 " mov.b32 %f167, %r32;\n"
3099 " // begin inline asm\n"
3100 " tex.2d.v4.f32.f32 {%r36, %r37, %r38, %r39}, [%rd4, {%f113, %f120}];\n"
3101 " // end inline asm\n"
3102 " mov.b32 %f168, %r37;\n"
3103 " mov.b32 %f169, %r36;\n"
3104 " // begin inline asm\n"
3105 " tex.2d.v4.f32.f32 {%r40, %r41, %r42, %r43}, [%rd4, {%f115, %f120}];\n"
3106 " // end inline asm\n"
3107 " mov.b32 %f170, %r41;\n"
3108 " mov.b32 %f171, %r40;\n"
3109 " // begin inline asm\n"
3110 " tex.2d.v4.f32.f32 {%r44, %r45, %r46, %r47}, [%rd4, {%f117, %f120}];\n"
3111 " // end inline asm\n"
3112 " mov.b32 %f172, %r45;\n"
3113 " mov.b32 %f173, %r44;\n"
3114 " mul.f32 %f174, %f26, %f169;\n"
3115 " mul.f32 %f175, %f26, %f168;\n"
3116 " fma.rn.f32 %f176, %f25, %f167, %f174;\n"
3117 " fma.rn.f32 %f177, %f25, %f166, %f175;\n"
3118 " fma.rn.f32 %f178, %f27, %f171, %f176;\n"
3119 " fma.rn.f32 %f179, %f27, %f170, %f177;\n"
3120 " fma.rn.f32 %f180, %f28, %f173, %f178;\n"
3121 " fma.rn.f32 %f181, %f28, %f172, %f179;\n"
3122 " add.f32 %f128, %f120, 0f3F800000;\n"
3123 " // begin inline asm\n"
3124 " tex.2d.v4.f32.f32 {%r48, %r49, %r50, %r51}, [%rd4, {%f111, %f128}];\n"
3125 " // end inline asm\n"
3126 " mov.b32 %f182, %r49;\n"
3127 " mov.b32 %f183, %r48;\n"
3128 " // begin inline asm\n"
3129 " tex.2d.v4.f32.f32 {%r52, %r53, %r54, %r55}, [%rd4, {%f113, %f128}];\n"
3130 " // end inline asm\n"
3131 " mov.b32 %f184, %r53;\n"
3132 " mov.b32 %f185, %r52;\n"
3133 " // begin inline asm\n"
3134 " tex.2d.v4.f32.f32 {%r56, %r57, %r58, %r59}, [%rd4, {%f115, %f128}];\n"
3135 " // end inline asm\n"
3136 " mov.b32 %f186, %r57;\n"
3137 " mov.b32 %f187, %r56;\n"
3138 " // begin inline asm\n"
3139 " tex.2d.v4.f32.f32 {%r60, %r61, %r62, %r63}, [%rd4, {%f117, %f128}];\n"
3140 " // end inline asm\n"
3141 " mov.b32 %f188, %r61;\n"
3142 " mov.b32 %f189, %r60;\n"
3143 " mul.f32 %f190, %f26, %f185;\n"
3144 " mul.f32 %f191, %f26, %f184;\n"
3145 " fma.rn.f32 %f192, %f25, %f183, %f190;\n"
3146 " fma.rn.f32 %f193, %f25, %f182, %f191;\n"
3147 " fma.rn.f32 %f194, %f27, %f187, %f192;\n"
3148 " fma.rn.f32 %f195, %f27, %f186, %f193;\n"
3149 " fma.rn.f32 %f196, %f28, %f189, %f194;\n"
3150 " fma.rn.f32 %f197, %f28, %f188, %f195;\n"
3151 " add.f32 %f136, %f120, 0f40000000;\n"
3152 " // begin inline asm\n"
3153 " tex.2d.v4.f32.f32 {%r64, %r65, %r66, %r67}, [%rd4, {%f111, %f136}];\n"
3154 " // end inline asm\n"
3155 " mov.b32 %f198, %r65;\n"
3156 " mov.b32 %f199, %r64;\n"
3157 " // begin inline asm\n"
3158 " tex.2d.v4.f32.f32 {%r68, %r69, %r70, %r71}, [%rd4, {%f113, %f136}];\n"
3159 " // end inline asm\n"
3160 " mov.b32 %f200, %r69;\n"
3161 " mov.b32 %f201, %r68;\n"
3162 " // begin inline asm\n"
3163 " tex.2d.v4.f32.f32 {%r72, %r73, %r74, %r75}, [%rd4, {%f115, %f136}];\n"
3164 " // end inline asm\n"
3165 " mov.b32 %f202, %r73;\n"
3166 " mov.b32 %f203, %r72;\n"
3167 " // begin inline asm\n"
3168 " tex.2d.v4.f32.f32 {%r76, %r77, %r78, %r79}, [%rd4, {%f117, %f136}];\n"
3169 " // end inline asm\n"
3170 " mov.b32 %f204, %r77;\n"
3171 " mov.b32 %f205, %r76;\n"
3172 " mul.f32 %f206, %f26, %f201;\n"
3173 " mul.f32 %f207, %f26, %f200;\n"
3174 " fma.rn.f32 %f208, %f25, %f199, %f206;\n"
3175 " fma.rn.f32 %f209, %f25, %f198, %f207;\n"
3176 " fma.rn.f32 %f210, %f27, %f203, %f208;\n"
3177 " fma.rn.f32 %f211, %f27, %f202, %f209;\n"
3178 " fma.rn.f32 %f212, %f28, %f205, %f210;\n"
3179 " fma.rn.f32 %f213, %f28, %f204, %f211;\n"
3180 " mul.f32 %f214, %f147, %f180;\n"
3181 " mul.f32 %f215, %f147, %f181;\n"
3182 " fma.rn.f32 %f216, %f146, %f164, %f214;\n"
3183 " fma.rn.f32 %f217, %f146, %f165, %f215;\n"
3184 " fma.rn.f32 %f218, %f148, %f196, %f216;\n"
3185 " fma.rn.f32 %f219, %f148, %f197, %f217;\n"
3186 " fma.rn.f32 %f220, %f149, %f212, %f218;\n"
3187 " fma.rn.f32 %f221, %f149, %f213, %f219;\n"
3188 " mul.f32 %f222, %f220, %f4;\n"
3189 " mul.f32 %f223, %f221, %f4;\n"
3190 " cvt.rzi.u16.f32 %rs1, %f222;\n"
3191 " cvt.rzi.u16.f32 %rs2, %f223;\n"
3192 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
3193 " mul.wide.s32 %rd20, %r80, 4;\n"
3194 " add.s64 %rd21, %rd1, %rd20;\n"
3195 " st.global.v2.u16 [%rd21], {%rs1, %rs2};\n"
3200 " // .globl Subsample_Lanczos_ushort4\n"
3201 ".visible .entry Subsample_Lanczos_ushort4(\n"
3202 " .param .u64 Subsample_Lanczos_ushort4_param_0,\n"
3203 " .param .u64 Subsample_Lanczos_ushort4_param_1,\n"
3204 " .param .u32 Subsample_Lanczos_ushort4_param_2,\n"
3205 " .param .u32 Subsample_Lanczos_ushort4_param_3,\n"
3206 " .param .u32 Subsample_Lanczos_ushort4_param_4,\n"
3207 " .param .u32 Subsample_Lanczos_ushort4_param_5,\n"
3208 " .param .u32 Subsample_Lanczos_ushort4_param_6,\n"
3209 " .param .u32 Subsample_Lanczos_ushort4_param_7,\n"
3210 " .param .f32 Subsample_Lanczos_ushort4_param_8\n"
3213 " .reg .pred %p<13>;\n"
3214 " .reg .b16 %rs<5>;\n"
3215 " .reg .f32 %f<306>;\n"
3216 " .reg .b32 %r<81>;\n"
3217 " .reg .b64 %rd<22>;\n"
3219 " ld.param.u32 %r4, [Subsample_Lanczos_ushort4_param_3];\n"
3220 " ld.param.u32 %r3, [Subsample_Lanczos_ushort4_param_2];\n"
3221 " // begin inline asm\n"
3222 " mov.u32 %r9, %ctaid.x;\n"
3223 " // end inline asm\n"
3224 " // begin inline asm\n"
3225 " mov.u32 %r10, %ctaid.y;\n"
3226 " // end inline asm\n"
3227 " // begin inline asm\n"
3228 " mov.u32 %r11, %ntid.x;\n"
3229 " // end inline asm\n"
3230 " // begin inline asm\n"
3231 " mov.u32 %r12, %ntid.y;\n"
3232 " // end inline asm\n"
3233 " // begin inline asm\n"
3234 " mov.u32 %r13, %tid.x;\n"
3235 " // end inline asm\n"
3236 " // begin inline asm\n"
3237 " mov.u32 %r14, %tid.y;\n"
3238 " // end inline asm\n"
3239 " mad.lo.s32 %r1, %r11, %r9, %r13;\n"
3240 " mad.lo.s32 %r2, %r12, %r10, %r14;\n"
3241 " setp.ge.s32 %p1, %r2, %r4;\n"
3242 " setp.ge.s32 %p2, %r1, %r3;\n"
3243 " or.pred %p3, %p2, %p1;\n"
3244 " @%p3 bra LBB11_18;\n"
3245 " ld.param.u32 %r7, [Subsample_Lanczos_ushort4_param_6];\n"
3246 " ld.param.u32 %r6, [Subsample_Lanczos_ushort4_param_5];\n"
3247 " cvt.rn.f32.s32 %f50, %r6;\n"
3248 " cvt.rn.f32.s32 %f51, %r3;\n"
3249 " div.rn.f32 %f52, %f50, %f51;\n"
3250 " cvt.rn.f32.s32 %f53, %r7;\n"
3251 " cvt.rn.f32.s32 %f54, %r4;\n"
3252 " div.rn.f32 %f55, %f53, %f54;\n"
3253 " cvt.rn.f32.s32 %f56, %r1;\n"
3254 " add.f32 %f57, %f56, 0f3F000000;\n"
3255 " fma.rn.f32 %f58, %f52, %f57, 0fBF000000;\n"
3256 " cvt.rn.f32.s32 %f59, %r2;\n"
3257 " add.f32 %f60, %f59, 0f3F000000;\n"
3258 " cvt.rmi.f32.f32 %f113, %f58;\n"
3259 " sub.f32 %f62, %f58, %f113;\n"
3260 " add.f32 %f63, %f62, 0f3F800000;\n"
3261 " mul.f32 %f5, %f63, 0f40490FDB;\n"
3262 " mul.f32 %f6, %f62, 0f40490FDB;\n"
3263 " add.f32 %f64, %f62, 0fBF800000;\n"
3264 " setp.eq.f32 %p5, %f5, 0f00000000;\n"
3265 " mov.f32 %f305, 0f3F800000;\n"
3266 " mov.f32 %f298, %f305;\n"
3267 " @%p5 bra LBB11_3;\n"
3268 " mul.f32 %f9, %f5, 0f3F000000;\n"
3269 " mul.f32 %f66, %f5, %f5;\n"
3270 " mul.f32 %f10, %f66, 0f3F000000;\n"
3271 " sin.approx.f32 %f67, %f5;\n"
3272 " sin.approx.f32 %f68, %f9;\n"
3273 " mul.f32 %f69, %f67, %f68;\n"
3274 " div.rn.f32 %f298, %f69, %f10;\n"
3276 " fma.rn.f32 %f61, %f55, %f60, 0fBF000000;\n"
3277 " add.f32 %f65, %f62, 0fC0000000;\n"
3278 " mul.f32 %f7, %f64, 0f40490FDB;\n"
3279 " setp.eq.f32 %p6, %f6, 0f00000000;\n"
3280 " mov.f32 %f299, %f305;\n"
3281 " @%p6 bra LBB11_5;\n"
3282 " mul.f32 %f13, %f6, 0f3F000000;\n"
3283 " mul.f32 %f71, %f6, %f6;\n"
3284 " mul.f32 %f14, %f71, 0f3F000000;\n"
3285 " sin.approx.f32 %f72, %f6;\n"
3286 " sin.approx.f32 %f73, %f13;\n"
3287 " mul.f32 %f74, %f72, %f73;\n"
3288 " div.rn.f32 %f299, %f74, %f14;\n"
3290 " cvt.rmi.f32.f32 %f120, %f61;\n"
3291 " mul.f32 %f8, %f65, 0f40490FDB;\n"
3292 " setp.eq.f32 %p7, %f7, 0f00000000;\n"
3293 " mov.f32 %f300, %f305;\n"
3294 " @%p7 bra LBB11_7;\n"
3295 " mul.f32 %f17, %f7, 0f3F000000;\n"
3296 " mul.f32 %f76, %f7, %f7;\n"
3297 " mul.f32 %f18, %f76, 0f3F000000;\n"
3298 " sin.approx.f32 %f77, %f7;\n"
3299 " sin.approx.f32 %f78, %f17;\n"
3300 " mul.f32 %f79, %f77, %f78;\n"
3301 " div.rn.f32 %f300, %f79, %f18;\n"
3303 " sub.f32 %f3, %f61, %f120;\n"
3304 " setp.eq.f32 %p8, %f8, 0f00000000;\n"
3305 " mov.f32 %f301, %f305;\n"
3306 " @%p8 bra LBB11_9;\n"
3307 " mul.f32 %f21, %f8, 0f3F000000;\n"
3308 " mul.f32 %f81, %f8, %f8;\n"
3309 " mul.f32 %f22, %f81, 0f3F000000;\n"
3310 " sin.approx.f32 %f82, %f8;\n"
3311 " sin.approx.f32 %f83, %f21;\n"
3312 " mul.f32 %f84, %f82, %f83;\n"
3313 " div.rn.f32 %f301, %f84, %f22;\n"
3315 " ld.param.u32 %r8, [Subsample_Lanczos_ushort4_param_7];\n"
3316 " add.f32 %f86, %f298, %f299;\n"
3317 " add.f32 %f89, %f3, 0f3F800000;\n"
3318 " mul.f32 %f29, %f89, 0f40490FDB;\n"
3319 " mul.f32 %f30, %f3, 0f40490FDB;\n"
3320 " add.f32 %f90, %f3, 0fBF800000;\n"
3321 " setp.eq.f32 %p9, %f29, 0f00000000;\n"
3322 " mov.f32 %f302, %f305;\n"
3323 " @%p9 bra LBB11_11;\n"
3324 " mul.f32 %f33, %f29, 0f3F000000;\n"
3325 " mul.f32 %f92, %f29, %f29;\n"
3326 " mul.f32 %f34, %f92, 0f3F000000;\n"
3327 " sin.approx.f32 %f93, %f29;\n"
3328 " sin.approx.f32 %f94, %f33;\n"
3329 " mul.f32 %f95, %f93, %f94;\n"
3330 " div.rn.f32 %f302, %f95, %f34;\n"
3332 " setp.gt.s32 %p4, %r8, 8;\n"
3333 " add.f32 %f87, %f86, %f300;\n"
3334 " add.f32 %f91, %f3, 0fC0000000;\n"
3335 " mul.f32 %f31, %f90, 0f40490FDB;\n"
3336 " setp.eq.f32 %p10, %f30, 0f00000000;\n"
3337 " mov.f32 %f303, %f305;\n"
3338 " @%p10 bra LBB11_13;\n"
3339 " mul.f32 %f37, %f30, 0f3F000000;\n"
3340 " mul.f32 %f97, %f30, %f30;\n"
3341 " mul.f32 %f38, %f97, 0f3F000000;\n"
3342 " sin.approx.f32 %f98, %f30;\n"
3343 " sin.approx.f32 %f99, %f37;\n"
3344 " mul.f32 %f100, %f98, %f99;\n"
3345 " div.rn.f32 %f303, %f100, %f38;\n"
3347 " ld.param.u64 %rd3, [Subsample_Lanczos_ushort4_param_1];\n"
3348 " selp.b32 %r15, 65535, 255, %p4;\n"
3349 " add.f32 %f88, %f87, %f301;\n"
3350 " mul.f32 %f32, %f91, 0f40490FDB;\n"
3351 " setp.eq.f32 %p11, %f31, 0f00000000;\n"
3352 " mov.f32 %f304, %f305;\n"
3353 " @%p11 bra LBB11_15;\n"
3354 " mul.f32 %f41, %f31, 0f3F000000;\n"
3355 " mul.f32 %f102, %f31, %f31;\n"
3356 " mul.f32 %f42, %f102, 0f3F000000;\n"
3357 " sin.approx.f32 %f103, %f31;\n"
3358 " sin.approx.f32 %f104, %f41;\n"
3359 " mul.f32 %f105, %f103, %f104;\n"
3360 " div.rn.f32 %f304, %f105, %f42;\n"
3362 " ld.param.u32 %r5, [Subsample_Lanczos_ushort4_param_4];\n"
3363 " ld.param.u64 %rd4, [Subsample_Lanczos_ushort4_param_0];\n"
3364 " cvta.to.global.u64 %rd1, %rd3;\n"
3365 " cvt.rn.f32.s32 %f4, %r15;\n"
3366 " div.rn.f32 %f25, %f298, %f88;\n"
3367 " div.rn.f32 %f26, %f299, %f88;\n"
3368 " div.rn.f32 %f27, %f300, %f88;\n"
3369 " div.rn.f32 %f28, %f301, %f88;\n"
3370 " setp.eq.f32 %p12, %f32, 0f00000000;\n"
3371 " @%p12 bra LBB11_17;\n"
3372 " mul.f32 %f45, %f32, 0f3F000000;\n"
3373 " mul.f32 %f107, %f32, %f32;\n"
3374 " mul.f32 %f46, %f107, 0f3F000000;\n"
3375 " sin.approx.f32 %f108, %f32;\n"
3376 " sin.approx.f32 %f109, %f45;\n"
3377 " mul.f32 %f110, %f108, %f109;\n"
3378 " div.rn.f32 %f305, %f110, %f46;\n"
3380 " add.f32 %f143, %f302, %f303;\n"
3381 " add.f32 %f144, %f143, %f304;\n"
3382 " add.f32 %f145, %f144, %f305;\n"
3383 " div.rn.f32 %f146, %f302, %f145;\n"
3384 " div.rn.f32 %f147, %f303, %f145;\n"
3385 " div.rn.f32 %f148, %f304, %f145;\n"
3386 " div.rn.f32 %f149, %f305, %f145;\n"
3387 " add.f32 %f111, %f113, 0fBF800000;\n"
3388 " add.f32 %f112, %f120, 0fBF800000;\n"
3389 " // begin inline asm\n"
3390 " tex.2d.v4.f32.f32 {%r16, %r17, %r18, %r19}, [%rd4, {%f111, %f112}];\n"
3391 " // end inline asm\n"
3392 " mov.b32 %f150, %r19;\n"
3393 " mov.b32 %f151, %r18;\n"
3394 " mov.b32 %f152, %r17;\n"
3395 " mov.b32 %f153, %r16;\n"
3396 " // begin inline asm\n"
3397 " tex.2d.v4.f32.f32 {%r20, %r21, %r22, %r23}, [%rd4, {%f113, %f112}];\n"
3398 " // end inline asm\n"
3399 " mov.b32 %f154, %r23;\n"
3400 " mov.b32 %f155, %r22;\n"
3401 " mov.b32 %f156, %r21;\n"
3402 " mov.b32 %f157, %r20;\n"
3403 " add.f32 %f115, %f113, 0f3F800000;\n"
3404 " // begin inline asm\n"
3405 " tex.2d.v4.f32.f32 {%r24, %r25, %r26, %r27}, [%rd4, {%f115, %f112}];\n"
3406 " // end inline asm\n"
3407 " mov.b32 %f158, %r27;\n"
3408 " mov.b32 %f159, %r26;\n"
3409 " mov.b32 %f160, %r25;\n"
3410 " mov.b32 %f161, %r24;\n"
3411 " add.f32 %f117, %f113, 0f40000000;\n"
3412 " // begin inline asm\n"
3413 " tex.2d.v4.f32.f32 {%r28, %r29, %r30, %r31}, [%rd4, {%f117, %f112}];\n"
3414 " // end inline asm\n"
3415 " mov.b32 %f162, %r31;\n"
3416 " mov.b32 %f163, %r30;\n"
3417 " mov.b32 %f164, %r29;\n"
3418 " mov.b32 %f165, %r28;\n"
3419 " mul.f32 %f166, %f26, %f157;\n"
3420 " mul.f32 %f167, %f26, %f156;\n"
3421 " mul.f32 %f168, %f26, %f155;\n"
3422 " mul.f32 %f169, %f26, %f154;\n"
3423 " fma.rn.f32 %f170, %f25, %f153, %f166;\n"
3424 " fma.rn.f32 %f171, %f25, %f152, %f167;\n"
3425 " fma.rn.f32 %f172, %f25, %f151, %f168;\n"
3426 " fma.rn.f32 %f173, %f25, %f150, %f169;\n"
3427 " fma.rn.f32 %f174, %f27, %f161, %f170;\n"
3428 " fma.rn.f32 %f175, %f27, %f160, %f171;\n"
3429 " fma.rn.f32 %f176, %f27, %f159, %f172;\n"
3430 " fma.rn.f32 %f177, %f27, %f158, %f173;\n"
3431 " fma.rn.f32 %f178, %f28, %f165, %f174;\n"
3432 " fma.rn.f32 %f179, %f28, %f164, %f175;\n"
3433 " fma.rn.f32 %f180, %f28, %f163, %f176;\n"
3434 " fma.rn.f32 %f181, %f28, %f162, %f177;\n"
3435 " // begin inline asm\n"
3436 " tex.2d.v4.f32.f32 {%r32, %r33, %r34, %r35}, [%rd4, {%f111, %f120}];\n"
3437 " // end inline asm\n"
3438 " mov.b32 %f182, %r35;\n"
3439 " mov.b32 %f183, %r34;\n"
3440 " mov.b32 %f184, %r33;\n"
3441 " mov.b32 %f185, %r32;\n"
3442 " // begin inline asm\n"
3443 " tex.2d.v4.f32.f32 {%r36, %r37, %r38, %r39}, [%rd4, {%f113, %f120}];\n"
3444 " // end inline asm\n"
3445 " mov.b32 %f186, %r39;\n"
3446 " mov.b32 %f187, %r38;\n"
3447 " mov.b32 %f188, %r37;\n"
3448 " mov.b32 %f189, %r36;\n"
3449 " // begin inline asm\n"
3450 " tex.2d.v4.f32.f32 {%r40, %r41, %r42, %r43}, [%rd4, {%f115, %f120}];\n"
3451 " // end inline asm\n"
3452 " mov.b32 %f190, %r43;\n"
3453 " mov.b32 %f191, %r42;\n"
3454 " mov.b32 %f192, %r41;\n"
3455 " mov.b32 %f193, %r40;\n"
3456 " // begin inline asm\n"
3457 " tex.2d.v4.f32.f32 {%r44, %r45, %r46, %r47}, [%rd4, {%f117, %f120}];\n"
3458 " // end inline asm\n"
3459 " mov.b32 %f194, %r47;\n"
3460 " mov.b32 %f195, %r46;\n"
3461 " mov.b32 %f196, %r45;\n"
3462 " mov.b32 %f197, %r44;\n"
3463 " mul.f32 %f198, %f26, %f189;\n"
3464 " mul.f32 %f199, %f26, %f188;\n"
3465 " mul.f32 %f200, %f26, %f187;\n"
3466 " mul.f32 %f201, %f26, %f186;\n"
3467 " fma.rn.f32 %f202, %f25, %f185, %f198;\n"
3468 " fma.rn.f32 %f203, %f25, %f184, %f199;\n"
3469 " fma.rn.f32 %f204, %f25, %f183, %f200;\n"
3470 " fma.rn.f32 %f205, %f25, %f182, %f201;\n"
3471 " fma.rn.f32 %f206, %f27, %f193, %f202;\n"
3472 " fma.rn.f32 %f207, %f27, %f192, %f203;\n"
3473 " fma.rn.f32 %f208, %f27, %f191, %f204;\n"
3474 " fma.rn.f32 %f209, %f27, %f190, %f205;\n"
3475 " fma.rn.f32 %f210, %f28, %f197, %f206;\n"
3476 " fma.rn.f32 %f211, %f28, %f196, %f207;\n"
3477 " fma.rn.f32 %f212, %f28, %f195, %f208;\n"
3478 " fma.rn.f32 %f213, %f28, %f194, %f209;\n"
3479 " add.f32 %f128, %f120, 0f3F800000;\n"
3480 " // begin inline asm\n"
3481 " tex.2d.v4.f32.f32 {%r48, %r49, %r50, %r51}, [%rd4, {%f111, %f128}];\n"
3482 " // end inline asm\n"
3483 " mov.b32 %f214, %r51;\n"
3484 " mov.b32 %f215, %r50;\n"
3485 " mov.b32 %f216, %r49;\n"
3486 " mov.b32 %f217, %r48;\n"
3487 " // begin inline asm\n"
3488 " tex.2d.v4.f32.f32 {%r52, %r53, %r54, %r55}, [%rd4, {%f113, %f128}];\n"
3489 " // end inline asm\n"
3490 " mov.b32 %f218, %r55;\n"
3491 " mov.b32 %f219, %r54;\n"
3492 " mov.b32 %f220, %r53;\n"
3493 " mov.b32 %f221, %r52;\n"
3494 " // begin inline asm\n"
3495 " tex.2d.v4.f32.f32 {%r56, %r57, %r58, %r59}, [%rd4, {%f115, %f128}];\n"
3496 " // end inline asm\n"
3497 " mov.b32 %f222, %r59;\n"
3498 " mov.b32 %f223, %r58;\n"
3499 " mov.b32 %f224, %r57;\n"
3500 " mov.b32 %f225, %r56;\n"
3501 " // begin inline asm\n"
3502 " tex.2d.v4.f32.f32 {%r60, %r61, %r62, %r63}, [%rd4, {%f117, %f128}];\n"
3503 " // end inline asm\n"
3504 " mov.b32 %f226, %r63;\n"
3505 " mov.b32 %f227, %r62;\n"
3506 " mov.b32 %f228, %r61;\n"
3507 " mov.b32 %f229, %r60;\n"
3508 " mul.f32 %f230, %f26, %f221;\n"
3509 " mul.f32 %f231, %f26, %f220;\n"
3510 " mul.f32 %f232, %f26, %f219;\n"
3511 " mul.f32 %f233, %f26, %f218;\n"
3512 " fma.rn.f32 %f234, %f25, %f217, %f230;\n"
3513 " fma.rn.f32 %f235, %f25, %f216, %f231;\n"
3514 " fma.rn.f32 %f236, %f25, %f215, %f232;\n"
3515 " fma.rn.f32 %f237, %f25, %f214, %f233;\n"
3516 " fma.rn.f32 %f238, %f27, %f225, %f234;\n"
3517 " fma.rn.f32 %f239, %f27, %f224, %f235;\n"
3518 " fma.rn.f32 %f240, %f27, %f223, %f236;\n"
3519 " fma.rn.f32 %f241, %f27, %f222, %f237;\n"
3520 " fma.rn.f32 %f242, %f28, %f229, %f238;\n"
3521 " fma.rn.f32 %f243, %f28, %f228, %f239;\n"
3522 " fma.rn.f32 %f244, %f28, %f227, %f240;\n"
3523 " fma.rn.f32 %f245, %f28, %f226, %f241;\n"
3524 " add.f32 %f136, %f120, 0f40000000;\n"
3525 " // begin inline asm\n"
3526 " tex.2d.v4.f32.f32 {%r64, %r65, %r66, %r67}, [%rd4, {%f111, %f136}];\n"
3527 " // end inline asm\n"
3528 " mov.b32 %f246, %r67;\n"
3529 " mov.b32 %f247, %r66;\n"
3530 " mov.b32 %f248, %r65;\n"
3531 " mov.b32 %f249, %r64;\n"
3532 " // begin inline asm\n"
3533 " tex.2d.v4.f32.f32 {%r68, %r69, %r70, %r71}, [%rd4, {%f113, %f136}];\n"
3534 " // end inline asm\n"
3535 " mov.b32 %f250, %r71;\n"
3536 " mov.b32 %f251, %r70;\n"
3537 " mov.b32 %f252, %r69;\n"
3538 " mov.b32 %f253, %r68;\n"
3539 " // begin inline asm\n"
3540 " tex.2d.v4.f32.f32 {%r72, %r73, %r74, %r75}, [%rd4, {%f115, %f136}];\n"
3541 " // end inline asm\n"
3542 " mov.b32 %f254, %r75;\n"
3543 " mov.b32 %f255, %r74;\n"
3544 " mov.b32 %f256, %r73;\n"
3545 " mov.b32 %f257, %r72;\n"
3546 " // begin inline asm\n"
3547 " tex.2d.v4.f32.f32 {%r76, %r77, %r78, %r79}, [%rd4, {%f117, %f136}];\n"
3548 " // end inline asm\n"
3549 " mov.b32 %f258, %r79;\n"
3550 " mov.b32 %f259, %r78;\n"
3551 " mov.b32 %f260, %r77;\n"
3552 " mov.b32 %f261, %r76;\n"
3553 " mul.f32 %f262, %f26, %f253;\n"
3554 " mul.f32 %f263, %f26, %f252;\n"
3555 " mul.f32 %f264, %f26, %f251;\n"
3556 " mul.f32 %f265, %f26, %f250;\n"
3557 " fma.rn.f32 %f266, %f25, %f249, %f262;\n"
3558 " fma.rn.f32 %f267, %f25, %f248, %f263;\n"
3559 " fma.rn.f32 %f268, %f25, %f247, %f264;\n"
3560 " fma.rn.f32 %f269, %f25, %f246, %f265;\n"
3561 " fma.rn.f32 %f270, %f27, %f257, %f266;\n"
3562 " fma.rn.f32 %f271, %f27, %f256, %f267;\n"
3563 " fma.rn.f32 %f272, %f27, %f255, %f268;\n"
3564 " fma.rn.f32 %f273, %f27, %f254, %f269;\n"
3565 " fma.rn.f32 %f274, %f28, %f261, %f270;\n"
3566 " fma.rn.f32 %f275, %f28, %f260, %f271;\n"
3567 " fma.rn.f32 %f276, %f28, %f259, %f272;\n"
3568 " fma.rn.f32 %f277, %f28, %f258, %f273;\n"
3569 " mul.f32 %f278, %f147, %f210;\n"
3570 " mul.f32 %f279, %f147, %f211;\n"
3571 " mul.f32 %f280, %f147, %f212;\n"
3572 " mul.f32 %f281, %f147, %f213;\n"
3573 " fma.rn.f32 %f282, %f146, %f178, %f278;\n"
3574 " fma.rn.f32 %f283, %f146, %f179, %f279;\n"
3575 " fma.rn.f32 %f284, %f146, %f180, %f280;\n"
3576 " fma.rn.f32 %f285, %f146, %f181, %f281;\n"
3577 " fma.rn.f32 %f286, %f148, %f242, %f282;\n"
3578 " fma.rn.f32 %f287, %f148, %f243, %f283;\n"
3579 " fma.rn.f32 %f288, %f148, %f244, %f284;\n"
3580 " fma.rn.f32 %f289, %f148, %f245, %f285;\n"
3581 " fma.rn.f32 %f290, %f149, %f274, %f286;\n"
3582 " fma.rn.f32 %f291, %f149, %f275, %f287;\n"
3583 " fma.rn.f32 %f292, %f149, %f276, %f288;\n"
3584 " fma.rn.f32 %f293, %f149, %f277, %f289;\n"
3585 " mul.f32 %f294, %f290, %f4;\n"
3586 " mul.f32 %f295, %f291, %f4;\n"
3587 " mul.f32 %f296, %f292, %f4;\n"
3588 " mul.f32 %f297, %f293, %f4;\n"
3589 " cvt.rzi.u16.f32 %rs1, %f294;\n"
3590 " cvt.rzi.u16.f32 %rs2, %f295;\n"
3591 " cvt.rzi.u16.f32 %rs3, %f296;\n"
3592 " cvt.rzi.u16.f32 %rs4, %f297;\n"
3593 " mad.lo.s32 %r80, %r2, %r5, %r1;\n"
3594 " mul.wide.s32 %rd20, %r80, 8;\n"
3595 " add.s64 %rd21, %rd1, %rd20;\n"
3596 " st.global.v4.u16 [%rd21], {%rs1, %rs2, %rs3, %rs4};\n"
const char vf_scale_cuda_bicubic_ptx[]