FFmpeg  4.4.4
deshake.c
Go to the documentation of this file.
1 // Generated from libavfilter/opencl/deshake.cl
3 "#line 1 \"libavfilter/opencl/deshake.cl\"\n"
4 "/*\n"
5 " * This file is part of FFmpeg.\n"
6 " *\n"
7 " * FFmpeg is free software; you can redistribute it and/or\n"
8 " * modify it under the terms of the GNU Lesser General Public\n"
9 " * License as published by the Free Software Foundation; either\n"
10 " * version 2.1 of the License, or (at your option) any later version.\n"
11 " *\n"
12 " * FFmpeg is distributed in the hope that it will be useful,\n"
13 " * but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
14 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
15 " * Lesser General Public License for more details.\n"
16 " *\n"
17 " * You should have received a copy of the GNU Lesser General Public\n"
18 " * License along with FFmpeg; if not, write to the Free Software\n"
19 " * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n"
20 " *\n"
21 " * Copyright (C) 2000, Intel Corporation, all rights reserved.\n"
22 " * Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n"
23 " * Third party copyrights are property of their respective owners.\n"
24 " *\n"
25 " * Redistribution and use in source and binary forms, with or without modification,\n"
26 " * are permitted provided that the following conditions are met:\n"
27 " *\n"
28 " * * Redistribution's of source code must retain the above copyright notice,\n"
29 " * this list of conditions and the following disclaimer.\n"
30 " *\n"
31 " * * Redistribution's in binary form must reproduce the above copyright notice,\n"
32 " * this list of conditions and the following disclaimer in the documentation\n"
33 " * and/or other materials provided with the distribution.\n"
34 " *\n"
35 " * * The name of the copyright holders may not be used to endorse or promote products\n"
36 " * derived from this software without specific prior written permission.\n"
37 " *\n"
38 " * This software is provided by the copyright holders and contributors \"as is\" and\n"
39 " * any express or implied warranties, including, but not limited to, the implied\n"
40 " * warranties of merchantability and fitness for a particular purpose are disclaimed.\n"
41 " * In no event shall the Intel Corporation or contributors be liable for any direct,\n"
42 " * indirect, incidental, special, exemplary, or consequential damages\n"
43 " * (including, but not limited to, procurement of substitute goods or services;\n"
44 " * loss of use, data, or profits; or business interruption) however caused\n"
45 " * and on any theory of liability, whether in contract, strict liability,\n"
46 " * or tort (including negligence or otherwise) arising in any way out of\n"
47 " * the use of this software, even if advised of the possibility of such damage.\n"
48 " */\n"
49 "\n"
50 "#define HARRIS_THRESHOLD 3.0f\n"
51 "// Block size over which to compute harris response\n"
52 "//\n"
53 "// Note that changing this will require fiddling with the local array sizes in\n"
54 "// harris_response\n"
55 "#define HARRIS_RADIUS 2\n"
56 "#define DISTANCE_THRESHOLD 80\n"
57 "\n"
58 "// Sub-pixel refinement window for feature points\n"
59 "#define REFINE_WIN_HALF_W 5\n"
60 "#define REFINE_WIN_HALF_H 5\n"
61 "#define REFINE_WIN_W 11 // REFINE_WIN_HALF_W * 2 + 1\n"
62 "#define REFINE_WIN_H 11\n"
63 "\n"
64 "// Non-maximum suppression window size\n"
65 "#define NONMAX_WIN 30\n"
66 "#define NONMAX_WIN_HALF 15 // NONMAX_WIN / 2\n"
67 "\n"
68 "typedef struct PointPair {\n"
69 " // Previous frame\n"
70 " float2 p1;\n"
71 " // Current frame\n"
72 " float2 p2;\n"
73 "} PointPair;\n"
74 "\n"
75 "typedef struct SmoothedPointPair {\n"
76 " // Non-smoothed point in current frame\n"
77 " int2 p1;\n"
78 " // Smoothed point in current frame\n"
79 " float2 p2;\n"
80 "} SmoothedPointPair;\n"
81 "\n"
82 "typedef struct MotionVector {\n"
83 " PointPair p;\n"
84 " // Used to mark vectors as potential outliers\n"
85 " int should_consider;\n"
86 "} MotionVector;\n"
87 "\n"
88 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
89 " CLK_ADDRESS_CLAMP_TO_EDGE |\n"
90 " CLK_FILTER_NEAREST;\n"
91 "\n"
92 "const sampler_t sampler_linear = CLK_NORMALIZED_COORDS_FALSE |\n"
93 " CLK_ADDRESS_CLAMP_TO_EDGE |\n"
94 " CLK_FILTER_LINEAR;\n"
95 "\n"
96 "const sampler_t sampler_linear_mirror = CLK_NORMALIZED_COORDS_TRUE |\n"
97 " CLK_ADDRESS_MIRRORED_REPEAT |\n"
98 " CLK_FILTER_LINEAR;\n"
99 "\n"
100 "// Writes to a 1D array at loc, treating it as a 2D array with the same\n"
101 "// dimensions as the global work size.\n"
102 "static void write_to_1d_arrf(__global float *buf, int2 loc, float val) {\n"
103 " buf[loc.x + loc.y * get_global_size(0)] = val;\n"
104 "}\n"
105 "\n"
106 "static void write_to_1d_arrul8(__global ulong8 *buf, int2 loc, ulong8 val) {\n"
107 " buf[loc.x + loc.y * get_global_size(0)] = val;\n"
108 "}\n"
109 "\n"
110 "static void write_to_1d_arrvec(__global MotionVector *buf, int2 loc, MotionVector val) {\n"
111 " buf[loc.x + loc.y * get_global_size(0)] = val;\n"
112 "}\n"
113 "\n"
114 "static void write_to_1d_arrf2(__global float2 *buf, int2 loc, float2 val) {\n"
115 " buf[loc.x + loc.y * get_global_size(0)] = val;\n"
116 "}\n"
117 "\n"
118 "static ulong8 read_from_1d_arrul8(__global const ulong8 *buf, int2 loc) {\n"
119 " return buf[loc.x + loc.y * get_global_size(0)];\n"
120 "}\n"
121 "\n"
122 "static float2 read_from_1d_arrf2(__global const float2 *buf, int2 loc) {\n"
123 " return buf[loc.x + loc.y * get_global_size(0)];\n"
124 "}\n"
125 "\n"
126 "// Returns the grayscale value at the given point.\n"
127 "static float pixel_grayscale(__read_only image2d_t src, int2 loc) {\n"
128 " float4 pixel = read_imagef(src, sampler, loc);\n"
129 " return (pixel.x + pixel.y + pixel.z) / 3.0f;\n"
130 "}\n"
131 "\n"
132 "static float convolve(\n"
133 " __local const float *grayscale,\n"
134 " int local_idx_x,\n"
135 " int local_idx_y,\n"
136 " float mask[3][3]\n"
137 ") {\n"
138 " float ret = 0;\n"
139 "\n"
140 " // These loops touch each pixel surrounding loc as well as loc itself\n"
141 " for (int i = 1, i2 = 0; i >= -1; --i, ++i2) {\n"
142 " for (int j = -1, j2 = 0; j <= 1; ++j, ++j2) {\n"
143 " ret += mask[i2][j2] * grayscale[(local_idx_x + 3 + j) + (local_idx_y + 3 + i) * 14];\n"
144 " }\n"
145 " }\n"
146 "\n"
147 " return ret;\n"
148 "}\n"
149 "\n"
150 "// Sums dx * dy for all pixels within radius of loc\n"
151 "static float sum_deriv_prod(\n"
152 " __local const float *grayscale,\n"
153 " float mask_x[3][3],\n"
154 " float mask_y[3][3]\n"
155 ") {\n"
156 " float ret = 0;\n"
157 "\n"
158 " for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {\n"
159 " for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {\n"
160 " ret += convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_x) *\n"
161 " convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_y);\n"
162 " }\n"
163 " }\n"
164 "\n"
165 " return ret;\n"
166 "}\n"
167 "\n"
168 "// Sums d<>^2 (determined by mask) for all pixels within radius of loc\n"
169 "static float sum_deriv_pow(__local const float *grayscale, float mask[3][3])\n"
170 "{\n"
171 " float ret = 0;\n"
172 "\n"
173 " for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {\n"
174 " for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {\n"
175 " float deriv = convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask);\n"
176 " ret += deriv * deriv;\n"
177 " }\n"
178 " }\n"
179 "\n"
180 " return ret;\n"
181 "}\n"
182 "\n"
183 "// Fills a box with the given radius and pixel around loc\n"
184 "static void draw_box(__write_only image2d_t dst, int2 loc, float4 pixel, int radius)\n"
185 "{\n"
186 " for (int i = -radius; i <= radius; ++i) {\n"
187 " for (int j = -radius; j <= radius; ++j) {\n"
188 " write_imagef(\n"
189 " dst,\n"
190 " (int2)(\n"
191 " // Clamp to avoid writing outside image bounds\n"
192 " clamp(loc.x + i, 0, get_image_dim(dst).x - 1),\n"
193 " clamp(loc.y + j, 0, get_image_dim(dst).y - 1)\n"
194 " ),\n"
195 " pixel\n"
196 " );\n"
197 " }\n"
198 " }\n"
199 "}\n"
200 "\n"
201 "// Converts the src image to grayscale\n"
202 "__kernel void grayscale(\n"
203 " __read_only image2d_t src,\n"
204 " __write_only image2d_t grayscale\n"
205 ") {\n"
206 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n"
207 " write_imagef(grayscale, loc, (float4)(pixel_grayscale(src, loc), 0.0f, 0.0f, 1.0f));\n"
208 "}\n"
209 "\n"
210 "// This kernel computes the harris response for the given grayscale src image\n"
211 "// within the given radius and writes it to harris_buf\n"
212 "__kernel void harris_response(\n"
213 " __read_only image2d_t grayscale,\n"
214 " __global float *harris_buf\n"
215 ") {\n"
216 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n"
217 "\n"
218 " if (loc.x > get_image_width(grayscale) - 1 || loc.y > get_image_height(grayscale) - 1) {\n"
219 " write_to_1d_arrf(harris_buf, loc, 0);\n"
220 " return;\n"
221 " }\n"
222 "\n"
223 " float scale = 1.0f / ((1 << 2) * HARRIS_RADIUS * 255.0f);\n"
224 "\n"
225 " float sobel_mask_x[3][3] = {\n"
226 " {-1, 0, 1},\n"
227 " {-2, 0, 2},\n"
228 " {-1, 0, 1}\n"
229 " };\n"
230 "\n"
231 " float sobel_mask_y[3][3] = {\n"
232 " { 1, 2, 1},\n"
233 " { 0, 0, 0},\n"
234 " {-1, -2, -1}\n"
235 " };\n"
236 "\n"
237 " // 8 x 8 local work + 3 pixels around each side (needed to accomodate for the\n"
238 " // block size radius of 2)\n"
239 " __local float grayscale_data[196];\n"
240 "\n"
241 " int idx = get_group_id(0) * get_local_size(0);\n"
242 " int idy = get_group_id(1) * get_local_size(1);\n"
243 "\n"
244 " for (int i = idy - 3, it = 0; i < idy + (int)get_local_size(1) + 3; i++, it++) {\n"
245 " for (int j = idx - 3, jt = 0; j < idx + (int)get_local_size(0) + 3; j++, jt++) {\n"
246 " grayscale_data[jt + it * 14] = read_imagef(grayscale, sampler, (int2)(j, i)).x;\n"
247 " }\n"
248 " }\n"
249 "\n"
250 " barrier(CLK_LOCAL_MEM_FENCE);\n"
251 "\n"
252 " float sumdxdy = sum_deriv_prod(grayscale_data, sobel_mask_x, sobel_mask_y);\n"
253 " float sumdx2 = sum_deriv_pow(grayscale_data, sobel_mask_x);\n"
254 " float sumdy2 = sum_deriv_pow(grayscale_data, sobel_mask_y);\n"
255 "\n"
256 " float trace = sumdx2 + sumdy2;\n"
257 " // r = det(M) - k(trace(M))^2\n"
258 " // k usually between 0.04 to 0.06\n"
259 " float r = (sumdx2 * sumdy2 - sumdxdy * sumdxdy) - 0.04f * (trace * trace) * pown(scale, 4);\n"
260 "\n"
261 " // Threshold the r value\n"
262 " harris_buf[loc.x + loc.y * get_image_width(grayscale)] = r * step(HARRIS_THRESHOLD, r);\n"
263 "}\n"
264 "\n"
265 "// Gets a patch centered around a float coordinate from a grayscale image using\n"
266 "// bilinear interpolation\n"
267 "static void get_rect_sub_pix(\n"
268 " __read_only image2d_t grayscale,\n"
269 " float *buffer,\n"
270 " int size_x,\n"
271 " int size_y,\n"
272 " float2 center\n"
273 ") {\n"
274 " float2 offset = ((float2)(size_x, size_y) - 1.0f) * 0.5f;\n"
275 "\n"
276 " for (int i = 0; i < size_y; i++) {\n"
277 " for (int j = 0; j < size_x; j++) {\n"
278 " buffer[i * size_x + j] = read_imagef(\n"
279 " grayscale,\n"
280 " sampler_linear,\n"
281 " (float2)(j, i) + center - offset\n"
282 " ).x * 255.0f;\n"
283 " }\n"
284 " }\n"
285 "}\n"
286 "\n"
287 "// Refines detected features at a sub-pixel level\n"
288 "//\n"
289 "// This function is ported from OpenCV\n"
290 "static float2 corner_sub_pix(\n"
291 " __read_only image2d_t grayscale,\n"
292 " float2 feature,\n"
293 " float *mask\n"
294 ") {\n"
295 " float2 init = feature;\n"
296 " int src_width = get_global_size(0);\n"
297 " int src_height = get_global_size(1);\n"
298 "\n"
299 " const int max_iters = 40;\n"
300 " const float eps = 0.001f * 0.001f;\n"
301 " int i, j, k;\n"
302 "\n"
303 " int iter = 0;\n"
304 " float err = 0;\n"
305 " float subpix[(REFINE_WIN_W + 2) * (REFINE_WIN_H + 2)];\n"
306 " const float flt_epsilon = 0x1.0p-23f;\n"
307 "\n"
308 " do {\n"
309 " float2 feature_tmp;\n"
310 " float a = 0, b = 0, c = 0, bb1 = 0, bb2 = 0;\n"
311 "\n"
312 " get_rect_sub_pix(grayscale, subpix, REFINE_WIN_W + 2, REFINE_WIN_H + 2, feature);\n"
313 " float *subpix_ptr = subpix;\n"
314 " subpix_ptr += REFINE_WIN_W + 2 + 1;\n"
315 "\n"
316 " // process gradient\n"
317 " for (i = 0, k = 0; i < REFINE_WIN_H; i++, subpix_ptr += REFINE_WIN_W + 2) {\n"
318 " float py = i - REFINE_WIN_HALF_H;\n"
319 "\n"
320 " for (j = 0; j < REFINE_WIN_W; j++, k++) {\n"
321 " float m = mask[k];\n"
322 " float tgx = subpix_ptr[j + 1] - subpix_ptr[j - 1];\n"
323 " float tgy = subpix_ptr[j + REFINE_WIN_W + 2] - subpix_ptr[j - REFINE_WIN_W - 2];\n"
324 " float gxx = tgx * tgx * m;\n"
325 " float gxy = tgx * tgy * m;\n"
326 " float gyy = tgy * tgy * m;\n"
327 " float px = j - REFINE_WIN_HALF_W;\n"
328 "\n"
329 " a += gxx;\n"
330 " b += gxy;\n"
331 " c += gyy;\n"
332 "\n"
333 " bb1 += gxx * px + gxy * py;\n"
334 " bb2 += gxy * px + gyy * py;\n"
335 " }\n"
336 " }\n"
337 "\n"
338 " float det = a * c - b * b;\n"
339 " if (fabs(det) <= flt_epsilon * flt_epsilon) {\n"
340 " break;\n"
341 " }\n"
342 "\n"
343 " // 2x2 matrix inversion\n"
344 " float scale = 1.0f / det;\n"
345 " feature_tmp.x = (float)(feature.x + (c * scale * bb1) - (b * scale * bb2));\n"
346 " feature_tmp.y = (float)(feature.y - (b * scale * bb1) + (a * scale * bb2));\n"
347 " err = dot(feature_tmp - feature, feature_tmp - feature);\n"
348 "\n"
349 " feature = feature_tmp;\n"
350 " if (feature.x < 0 || feature.x >= src_width || feature.y < 0 || feature.y >= src_height) {\n"
351 " break;\n"
352 " }\n"
353 " } while (++iter < max_iters && err > eps);\n"
354 "\n"
355 " // Make sure new point isn't too far from the initial point (indicates poor convergence)\n"
356 " if (fabs(feature.x - init.x) > REFINE_WIN_HALF_W || fabs(feature.y - init.y) > REFINE_WIN_HALF_H) {\n"
357 " feature = init;\n"
358 " }\n"
359 "\n"
360 " return feature;\n"
361 "}\n"
362 "\n"
363 "// Performs non-maximum suppression on the harris response and writes the resulting\n"
364 "// feature locations to refined_features.\n"
365 "//\n"
366 "// Assumes that refined_features and the global work sizes are set up such that the image\n"
367 "// is split up into a grid of 32x32 blocks where each block has a single slot in the\n"
368 "// refined_features buffer. This kernel finds the best corner in each block (if the\n"
369 "// block has any) and writes it to the corresponding slot in the buffer.\n"
370 "//\n"
371 "// If subpixel_refine is true, the features are additionally refined at a sub-pixel\n"
372 "// level for increased precision.\n"
373 "__kernel void refine_features(\n"
374 " __read_only image2d_t grayscale,\n"
375 " __global const float *harris_buf,\n"
376 " __global float2 *refined_features,\n"
377 " int subpixel_refine\n"
378 ") {\n"
379 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n"
380 " // The location in the grayscale buffer rather than the compacted grid\n"
381 " int2 loc_i = (int2)(loc.x * 32, loc.y * 32);\n"
382 "\n"
383 " float new_val;\n"
384 " float max_val = 0;\n"
385 " float2 loc_max = (float2)(-1, -1);\n"
386 "\n"
387 " int end_x = min(loc_i.x + 32, (int)get_image_dim(grayscale).x - 1);\n"
388 " int end_y = min(loc_i.y + 32, (int)get_image_dim(grayscale).y - 1);\n"
389 "\n"
390 " for (int i = loc_i.x; i < end_x; ++i) {\n"
391 " for (int j = loc_i.y; j < end_y; ++j) {\n"
392 " new_val = harris_buf[i + j * get_image_dim(grayscale).x];\n"
393 "\n"
394 " if (new_val > max_val) {\n"
395 " max_val = new_val;\n"
396 " loc_max = (float2)(i, j);\n"
397 " }\n"
398 " }\n"
399 " }\n"
400 "\n"
401 " if (max_val == 0) {\n"
402 " // There are no features in this part of the frame\n"
403 " write_to_1d_arrf2(refined_features, loc, loc_max);\n"
404 " return;\n"
405 " }\n"
406 "\n"
407 " if (subpixel_refine) {\n"
408 " float mask[REFINE_WIN_H * REFINE_WIN_W];\n"
409 " for (int i = 0; i < REFINE_WIN_H; i++) {\n"
410 " float y = (float)(i - REFINE_WIN_HALF_H) / REFINE_WIN_HALF_H;\n"
411 " float vy = exp(-y * y);\n"
412 "\n"
413 " for (int j = 0; j < REFINE_WIN_W; j++) {\n"
414 " float x = (float)(j - REFINE_WIN_HALF_W) / REFINE_WIN_HALF_W;\n"
415 " mask[i * REFINE_WIN_W + j] = (float)(vy * exp(-x * x));\n"
416 " }\n"
417 " }\n"
418 "\n"
419 " loc_max = corner_sub_pix(grayscale, loc_max, mask);\n"
420 " }\n"
421 "\n"
422 " write_to_1d_arrf2(refined_features, loc, loc_max);\n"
423 "}\n"
424 "\n"
425 "// Extracts BRIEF descriptors from the grayscale src image for the given features\n"
426 "// using the provided sampler.\n"
427 "__kernel void brief_descriptors(\n"
428 " __read_only image2d_t grayscale,\n"
429 " __global const float2 *refined_features,\n"
430 " // for 512 bit descriptors\n"
431 " __global ulong8 *desc_buf,\n"
432 " __global const PointPair *brief_pattern\n"
433 ") {\n"
434 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n"
435 " float2 feature = read_from_1d_arrf2(refined_features, loc);\n"
436 "\n"
437 " // There was no feature in this part of the frame\n"
438 " if (feature.x == -1) {\n"
439 " write_to_1d_arrul8(desc_buf, loc, (ulong8)(0));\n"
440 " return;\n"
441 " }\n"
442 "\n"
443 " ulong8 desc = 0;\n"
444 " ulong *p = &desc;\n"
445 "\n"
446 " for (int i = 0; i < 8; ++i) {\n"
447 " for (int j = 0; j < 64; ++j) {\n"
448 " PointPair pair = brief_pattern[j * (i + 1)];\n"
449 " float l1 = read_imagef(grayscale, sampler_linear, feature + pair.p1).x;\n"
450 " float l2 = read_imagef(grayscale, sampler_linear, feature + pair.p2).x;\n"
451 "\n"
452 " if (l1 < l2) {\n"
453 " p[i] |= 1UL << j;\n"
454 " }\n"
455 " }\n"
456 " }\n"
457 "\n"
458 " write_to_1d_arrul8(desc_buf, loc, desc);\n"
459 "}\n"
460 "\n"
461 "// Given buffers with descriptors for the current and previous frame, determines\n"
462 "// which ones match, writing correspondences to matches_buf.\n"
463 "//\n"
464 "// Feature and descriptor buffers are assumed to be compacted (each element sourced\n"
465 "// from a 32x32 block in the frame being processed).\n"
466 "__kernel void match_descriptors(\n"
467 " __global const float2 *prev_refined_features,\n"
468 " __global const float2 *refined_features,\n"
469 " __global const ulong8 *desc_buf,\n"
470 " __global const ulong8 *prev_desc_buf,\n"
471 " __global MotionVector *matches_buf\n"
472 ") {\n"
473 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n"
474 " ulong8 desc = read_from_1d_arrul8(desc_buf, loc);\n"
475 " const int search_radius = 3;\n"
476 "\n"
477 " MotionVector invalid_vector = (MotionVector) {\n"
478 " (PointPair) {\n"
479 " (float2)(-1, -1),\n"
480 " (float2)(-1, -1)\n"
481 " },\n"
482 " 0\n"
483 " };\n"
484 "\n"
485 " if (desc.s0 == 0 && desc.s1 == 0) {\n"
486 " // There was no feature in this part of the frame\n"
487 " write_to_1d_arrvec(\n"
488 " matches_buf,\n"
489 " loc,\n"
490 " invalid_vector\n"
491 " );\n"
492 " return;\n"
493 " }\n"
494 "\n"
495 " int2 start = max(loc - search_radius, 0);\n"
496 " int2 end = min(loc + search_radius, (int2)(get_global_size(0) - 1, get_global_size(1) - 1));\n"
497 "\n"
498 " for (int i = start.x; i < end.x; ++i) {\n"
499 " for (int j = start.y; j < end.y; ++j) {\n"
500 " int2 prev_point = (int2)(i, j);\n"
501 " int total_dist = 0;\n"
502 "\n"
503 " ulong8 prev_desc = read_from_1d_arrul8(prev_desc_buf, prev_point);\n"
504 "\n"
505 " if (prev_desc.s0 == 0 && prev_desc.s1 == 0) {\n"
506 " continue;\n"
507 " }\n"
508 "\n"
509 " ulong *prev_desc_p = &prev_desc;\n"
510 " ulong *desc_p = &desc;\n"
511 "\n"
512 " for (int i = 0; i < 8; i++) {\n"
513 " total_dist += popcount(desc_p[i] ^ prev_desc_p[i]);\n"
514 " }\n"
515 "\n"
516 " if (total_dist < DISTANCE_THRESHOLD) {\n"
517 " write_to_1d_arrvec(\n"
518 " matches_buf,\n"
519 " loc,\n"
520 " (MotionVector) {\n"
521 " (PointPair) {\n"
522 " read_from_1d_arrf2(prev_refined_features, prev_point),\n"
523 " read_from_1d_arrf2(refined_features, loc)\n"
524 " },\n"
525 " 1\n"
526 " }\n"
527 " );\n"
528 "\n"
529 " return;\n"
530 " }\n"
531 " }\n"
532 " }\n"
533 "\n"
534 " // There is no found match for this point\n"
535 " write_to_1d_arrvec(\n"
536 " matches_buf,\n"
537 " loc,\n"
538 " invalid_vector\n"
539 " );\n"
540 "}\n"
541 "\n"
542 "// Returns the position of the given point after the transform is applied\n"
543 "static float2 transformed_point(float2 p, __global const float *transform) {\n"
544 " float2 ret;\n"
545 "\n"
546 " ret.x = p.x * transform[0] + p.y * transform[1] + transform[2];\n"
547 " ret.y = p.x * transform[3] + p.y * transform[4] + transform[5];\n"
548 "\n"
549 " return ret;\n"
550 "}\n"
551 "\n"
552 "\n"
553 "// Performs the given transform on the src image\n"
554 "__kernel void transform(\n"
555 " __read_only image2d_t src,\n"
556 " __write_only image2d_t dst,\n"
557 " __global const float *transform\n"
558 ") {\n"
559 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n"
560 " float2 norm = convert_float2(get_image_dim(src));\n"
561 "\n"
562 " write_imagef(\n"
563 " dst,\n"
564 " loc,\n"
565 " read_imagef(\n"
566 " src,\n"
567 " sampler_linear_mirror,\n"
568 " transformed_point((float2)(loc.x, loc.y), transform) / norm\n"
569 " )\n"
570 " );\n"
571 "}\n"
572 "\n"
573 "// Returns the new location of the given point using the given crop bounding box\n"
574 "// and the width and height of the original frame.\n"
575 "static float2 cropped_point(\n"
576 " float2 p,\n"
577 " float2 top_left,\n"
578 " float2 bottom_right,\n"
579 " int2 orig_dim\n"
580 ") {\n"
581 " float2 ret;\n"
582 "\n"
583 " float crop_width = bottom_right.x - top_left.x;\n"
584 " float crop_height = bottom_right.y - top_left.y;\n"
585 "\n"
586 " float width_norm = p.x / (float)orig_dim.x;\n"
587 " float height_norm = p.y / (float)orig_dim.y;\n"
588 "\n"
589 " ret.x = (width_norm * crop_width) + top_left.x;\n"
590 " ret.y = (height_norm * crop_height) + ((float)orig_dim.y - bottom_right.y);\n"
591 "\n"
592 " return ret;\n"
593 "}\n"
594 "\n"
595 "// Upscales the given cropped region to the size of the original frame\n"
596 "__kernel void crop_upscale(\n"
597 " __read_only image2d_t src,\n"
598 " __write_only image2d_t dst,\n"
599 " float2 top_left,\n"
600 " float2 bottom_right\n"
601 ") {\n"
602 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n"
603 "\n"
604 " write_imagef(\n"
605 " dst,\n"
606 " loc,\n"
607 " read_imagef(\n"
608 " src,\n"
609 " sampler_linear,\n"
610 " cropped_point((float2)(loc.x, loc.y), top_left, bottom_right, get_image_dim(dst))\n"
611 " )\n"
612 " );\n"
613 "}\n"
614 "\n"
615 "// Draws boxes to represent the given point matches and uses the given transform\n"
616 "// and crop info to make sure their positions are accurate on the transformed frame.\n"
617 "//\n"
618 "// model_matches is an array of three points that were used by the RANSAC process\n"
619 "// to generate the given transform\n"
620 "__kernel void draw_debug_info(\n"
621 " __write_only image2d_t dst,\n"
622 " __global const MotionVector *matches,\n"
623 " __global const MotionVector *model_matches,\n"
624 " int num_model_matches,\n"
625 " __global const float *transform\n"
626 ") {\n"
627 " int loc = get_global_id(0);\n"
628 " MotionVector vec = matches[loc];\n"
629 " // Black box: matched point that RANSAC considered an outlier\n"
630 " float4 big_rect_color = (float4)(0.1f, 0.1f, 0.1f, 1.0f);\n"
631 "\n"
632 " if (vec.should_consider) {\n"
633 " // Green box: matched point that RANSAC considered an inlier\n"
634 " big_rect_color = (float4)(0.0f, 1.0f, 0.0f, 1.0f);\n"
635 " }\n"
636 "\n"
637 " for (int i = 0; i < num_model_matches; i++) {\n"
638 " if (vec.p.p2.x == model_matches[i].p.p2.x && vec.p.p2.y == model_matches[i].p.p2.y) {\n"
639 " // Orange box: point used to calculate model\n"
640 " big_rect_color = (float4)(1.0f, 0.5f, 0.0f, 1.0f);\n"
641 " }\n"
642 " }\n"
643 "\n"
644 " float2 transformed_p1 = transformed_point(vec.p.p1, transform);\n"
645 " float2 transformed_p2 = transformed_point(vec.p.p2, transform);\n"
646 "\n"
647 " draw_box(dst, (int2)(transformed_p2.x, transformed_p2.y), big_rect_color, 5);\n"
648 " // Small light blue box: the point in the previous frame\n"
649 " draw_box(dst, (int2)(transformed_p1.x, transformed_p1.y), (float4)(0.0f, 0.3f, 0.7f, 1.0f), 3);\n"
650 "}\n"
651 ;
const char * ff_opencl_source_deshake
Definition: deshake.c:2