File: | root/firefox-clang/third_party/aom/av1/encoder/cnn.c |
Warning: | line 22, column 9 Excessive padding in 'CONVOLVE_OPS' (8 padding bytes, where 0 is optimal). Optimal fields order: input, layer_config, output, in_width, in_height, in_stride, out_stride, start_idx, th_step, consider reordering the fields or adding explicit padding members |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright (c) 2019, Alliance for Open Media. All rights reserved. |
3 | * |
4 | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | * was not distributed with this source code in the LICENSE file, you can |
7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | * Media Patent License 1.0 was not distributed with this source code in the |
9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | */ |
11 | |
12 | #include <assert.h> |
13 | #include <math.h> |
14 | #include <stdbool.h> |
15 | |
16 | #include "aom_dsp/aom_dsp_common.h" |
17 | #include "av1/common/av1_common_int.h" |
18 | #include "av1/encoder/cnn.h" |
19 | |
20 | #define CLAMPINDEX(a, hi)((a) < 0 ? 0 : ((a) >= (hi) ? ((hi)-1) : (a))) ((a) < 0 ? 0 : ((a) >= (hi) ? ((hi)-1) : (a))) |
21 | |
22 | typedef struct { |
Excessive padding in 'CONVOLVE_OPS' (8 padding bytes, where 0 is optimal). Optimal fields order: input, layer_config, output, in_width, in_height, in_stride, out_stride, start_idx, th_step, consider reordering the fields or adding explicit padding members | |
23 | const float **input; |
24 | int in_width; |
25 | int in_height; |
26 | int in_stride; |
27 | const CNN_LAYER_CONFIG *layer_config; |
28 | float **output; |
29 | int out_stride; |
30 | int start_idx; |
31 | int th_step; |
32 | } CONVOLVE_OPS; |
33 | |
34 | static inline float softsign(float x) { return x / (fabsf(x) + 1.0f); } |
35 | |
36 | static inline float relu(float x) { return (x < 0) ? 0 : x; } |
37 | |
38 | typedef struct { |
39 | int allocsize; |
40 | int channels; |
41 | int width, height, stride; |
42 | float *buf[CNN_MAX_CHANNELS256]; |
43 | } TENSOR; |
44 | |
45 | static void init_tensor(TENSOR *tensor) { memset(tensor, 0, sizeof(*tensor)); } |
46 | |
47 | static void free_tensor(TENSOR *tensor) { |
48 | if (tensor->allocsize) { |
49 | aom_free(tensor->buf[0]); |
50 | tensor->buf[0] = NULL((void*)0); |
51 | tensor->allocsize = 0; |
52 | } |
53 | } |
54 | |
55 | static bool_Bool realloc_tensor(TENSOR *tensor, int channels, int width, |
56 | int height) { |
57 | const int newallocsize = channels * width * height; |
58 | if (tensor->allocsize < newallocsize) { |
59 | free_tensor(tensor); |
60 | tensor->buf[0] = |
61 | (float *)aom_malloc(sizeof(*tensor->buf[0]) * newallocsize); |
62 | if (!tensor->buf[0]) return false0; |
63 | tensor->allocsize = newallocsize; |
64 | } |
65 | tensor->width = width; |
66 | tensor->height = height; |
67 | tensor->stride = width; |
68 | tensor->channels = channels; |
69 | for (int c = 1; c < channels; ++c) |
70 | tensor->buf[c] = &tensor->buf[0][c * width * height]; |
71 | return true1; |
72 | } |
73 | |
74 | static void copy_tensor(const TENSOR *src, int copy_channels, int dst_offset, |
75 | TENSOR *dst) { |
76 | assert(src->width == dst->width)((void) sizeof ((src->width == dst->width) ? 1 : 0), __extension__ ({ if (src->width == dst->width) ; else __assert_fail ( "src->width == dst->width", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 76, __extension__ __PRETTY_FUNCTION__); })); |
77 | assert(src->height == dst->height)((void) sizeof ((src->height == dst->height) ? 1 : 0), __extension__ ({ if (src->height == dst->height) ; else __assert_fail ("src->height == dst->height", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 77, __extension__ __PRETTY_FUNCTION__); })); |
78 | assert(copy_channels <= src->channels)((void) sizeof ((copy_channels <= src->channels) ? 1 : 0 ), __extension__ ({ if (copy_channels <= src->channels) ; else __assert_fail ("copy_channels <= src->channels" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 78 , __extension__ __PRETTY_FUNCTION__); })); |
79 | if (src->stride == dst->width && dst->stride == dst->width) { |
80 | for (int c = 0; c < copy_channels; ++c) { |
81 | memcpy(dst->buf[dst_offset + c], src->buf[c], |
82 | sizeof(*dst->buf[0]) * src->width * src->height); |
83 | } |
84 | } else { |
85 | for (int c = 0; c < copy_channels; ++c) { |
86 | for (int r = 0; r < dst->height; ++r) { |
87 | memcpy(&dst->buf[dst_offset + c][r * dst->stride], |
88 | &src->buf[c][r * src->stride], |
89 | dst->width * sizeof(*dst->buf[c])); |
90 | } |
91 | } |
92 | } |
93 | } |
94 | |
95 | static void assign_tensor(TENSOR *tensor, float *buf[CNN_MAX_CHANNELS256], |
96 | int channels, int width, int height, int stride) { |
97 | tensor->allocsize = 0; |
98 | tensor->channels = channels; |
99 | tensor->width = width; |
100 | tensor->height = height; |
101 | tensor->stride = stride; |
102 | if (buf) { |
103 | for (int c = 0; c < channels; ++c) tensor->buf[c] = buf[c]; |
104 | } else { |
105 | for (int c = 0; c < channels; ++c) tensor->buf[c] = NULL((void*)0); |
106 | } |
107 | } |
108 | |
109 | static void swap_tensor(TENSOR *t1, TENSOR *t2) { |
110 | TENSOR t = *t1; |
111 | *t1 = *t2; |
112 | *t2 = t; |
113 | } |
114 | |
115 | // The concatenated tensor goes into dst with first the channels in |
116 | // original dst followed by the channels in the src |
117 | static bool_Bool concat_tensor(const TENSOR *src, TENSOR *dst) { |
118 | assert(src->width == dst->width)((void) sizeof ((src->width == dst->width) ? 1 : 0), __extension__ ({ if (src->width == dst->width) ; else __assert_fail ( "src->width == dst->width", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 118, __extension__ __PRETTY_FUNCTION__); })); |
119 | assert(src->height == dst->height)((void) sizeof ((src->height == dst->height) ? 1 : 0), __extension__ ({ if (src->height == dst->height) ; else __assert_fail ("src->height == dst->height", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 119, __extension__ __PRETTY_FUNCTION__); })); |
120 | |
121 | const int dst_channels = dst->channels; |
122 | const int channels = dst->channels + src->channels; |
123 | const int newallocsize = channels * dst->width * dst->height; |
124 | if (dst->allocsize < newallocsize) { |
125 | TENSOR t; |
126 | init_tensor(&t); |
127 | // allocate new buffers and copy first the dst channels |
128 | if (!realloc_tensor(&t, channels, dst->width, dst->height)) return false0; |
129 | copy_tensor(dst, dst->channels, 0, &t); |
130 | // Swap the tensors and free the old buffers |
131 | swap_tensor(dst, &t); |
132 | free_tensor(&t); |
133 | } |
134 | for (int c = 1; c < channels; ++c) |
135 | dst->buf[c] = &dst->buf[0][c * dst->width * dst->height]; |
136 | // Copy the channels in src after the first dst_channels channels. |
137 | copy_tensor(src, src->channels, dst_channels, dst); |
138 | return true1; |
139 | } |
140 | |
141 | #ifndef NDEBUG |
142 | static int check_tensor_equal_dims(TENSOR *t1, TENSOR *t2) { |
143 | return (t1->width == t2->width && t1->height == t2->height); |
144 | } |
145 | |
146 | static int check_tensor_equal_size(TENSOR *t1, TENSOR *t2) { |
147 | return (t1->channels == t2->channels && t1->width == t2->width && |
148 | t1->height == t2->height); |
149 | } |
150 | #endif // NDEBUG |
151 | |
152 | void av1_find_cnn_layer_output_size(int in_width, int in_height, |
153 | const CNN_LAYER_CONFIG *layer_config, |
154 | int *out_width, int *out_height) { |
155 | assert(layer_config->skip_width > 0)((void) sizeof ((layer_config->skip_width > 0) ? 1 : 0) , __extension__ ({ if (layer_config->skip_width > 0) ; else __assert_fail ("layer_config->skip_width > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 155, __extension__ __PRETTY_FUNCTION__); })); |
156 | assert(layer_config->skip_height > 0)((void) sizeof ((layer_config->skip_height > 0) ? 1 : 0 ), __extension__ ({ if (layer_config->skip_height > 0) ; else __assert_fail ("layer_config->skip_height > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 156, __extension__ __PRETTY_FUNCTION__); })); |
157 | if (!layer_config->deconvolve) { |
158 | switch (layer_config->pad) { |
159 | case PADDING_SAME_ZERO: |
160 | case PADDING_SAME_REPLICATE: |
161 | *out_width = (in_width + layer_config->skip_width - 1) / |
162 | layer_config->skip_width; |
163 | *out_height = (in_height + layer_config->skip_height - 1) / |
164 | layer_config->skip_height; |
165 | break; |
166 | case PADDING_VALID: |
167 | *out_width = |
168 | (in_width - layer_config->filter_width + layer_config->skip_width) / |
169 | layer_config->skip_width; |
170 | *out_height = (in_height - layer_config->filter_height + |
171 | layer_config->skip_height) / |
172 | layer_config->skip_height; |
173 | break; |
174 | default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 174 , __extension__ __PRETTY_FUNCTION__); })); |
175 | } |
176 | } else { |
177 | switch (layer_config->pad) { |
178 | case PADDING_SAME_ZERO: |
179 | case PADDING_SAME_REPLICATE: |
180 | *out_width = in_width * layer_config->skip_width; |
181 | *out_height = in_height * layer_config->skip_height; |
182 | break; |
183 | case PADDING_VALID: |
184 | *out_width = (in_width - 1) * layer_config->skip_width + |
185 | layer_config->filter_width; |
186 | *out_height = (in_height - 1) * layer_config->skip_height + |
187 | layer_config->filter_height; |
188 | break; |
189 | default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 189 , __extension__ __PRETTY_FUNCTION__); })); |
190 | } |
191 | } |
192 | } |
193 | |
194 | static void find_cnn_out_channels(const CNN_LAYER_CONFIG *layer_config, |
195 | int channels_per_branch[]) { |
196 | int branch = layer_config->branch; |
197 | const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config; |
198 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
199 | if ((branch_config->input_to_branches & (1 << b)) && b != branch) { |
200 | if (layer_config->branch_copy_type == BRANCH_INPUT) { |
201 | channels_per_branch[b] = layer_config->in_channels; |
202 | } else if (layer_config->branch_copy_type == BRANCH_OUTPUT) { |
203 | channels_per_branch[b] = layer_config->out_channels; |
204 | } else if (layer_config->branch_copy_type == BRANCH_COMBINED) { |
205 | channels_per_branch[b] = layer_config->out_channels; |
206 | for (int c = 0; c < CNN_MAX_BRANCHES4; ++c) { |
207 | if ((branch_config->branches_to_combine & (1 << c)) && c != branch) { |
208 | assert(channels_per_branch[c] > 0)((void) sizeof ((channels_per_branch[c] > 0) ? 1 : 0), __extension__ ({ if (channels_per_branch[c] > 0) ; else __assert_fail ( "channels_per_branch[c] > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 208, __extension__ __PRETTY_FUNCTION__); })); |
209 | channels_per_branch[b] += channels_per_branch[c]; |
210 | } |
211 | } |
212 | } |
213 | } |
214 | } |
215 | channels_per_branch[branch] = layer_config->out_channels; |
216 | for (int c = 0; c < CNN_MAX_BRANCHES4; ++c) { |
217 | if ((branch_config->branches_to_combine & (1 << c)) && c != branch) { |
218 | assert(channels_per_branch[c] > 0)((void) sizeof ((channels_per_branch[c] > 0) ? 1 : 0), __extension__ ({ if (channels_per_branch[c] > 0) ; else __assert_fail ( "channels_per_branch[c] > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 218, __extension__ __PRETTY_FUNCTION__); })); |
219 | channels_per_branch[branch] += channels_per_branch[c]; |
220 | } |
221 | } |
222 | } |
223 | |
224 | #if CONFIG_DEBUG0 |
225 | static inline int cnn_has_at_least_one_output(const CNN_CONFIG *cnn_config) { |
226 | const int num_layers = cnn_config->num_layers; |
227 | const CNN_LAYER_CONFIG *layer_configs = cnn_config->layer_config; |
228 | |
229 | for (int idx = 0; idx < num_layers; idx++) { |
230 | if (layer_configs[idx].output_num != -1) { |
231 | return 1; |
232 | } |
233 | } |
234 | return 0; |
235 | } |
236 | #endif |
237 | |
238 | void av1_find_cnn_output_size(int in_width, int in_height, |
239 | const CNN_CONFIG *cnn_config, int *out_width, |
240 | int *out_height, int *out_channels) { |
241 | int channels_per_branch[CNN_MAX_BRANCHES4] = { 0 }; |
242 | int i_width[CNN_MAX_BRANCHES4] = { 0 }; |
243 | int i_height[CNN_MAX_BRANCHES4] = { 0 }; |
244 | i_width[0] = in_width + cnn_config->ext_width * 2; |
245 | i_height[0] = in_height + cnn_config->ext_height * 2; |
246 | |
247 | #if CONFIG_DEBUG0 |
248 | assert(cnn_has_at_least_one_output(cnn_config))((void) sizeof ((cnn_has_at_least_one_output(cnn_config)) ? 1 : 0), __extension__ ({ if (cnn_has_at_least_one_output(cnn_config )) ; else __assert_fail ("cnn_has_at_least_one_output(cnn_config)" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 248 , __extension__ __PRETTY_FUNCTION__); })); |
249 | #endif |
250 | |
251 | for (int i = 0; i < cnn_config->num_layers; ++i) { |
252 | const CNN_LAYER_CONFIG *layer_config = &cnn_config->layer_config[i]; |
253 | const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config; |
254 | const int branch = layer_config->branch; |
255 | int o_width = 0, o_height = 0; |
256 | |
257 | if (layer_config->branch_copy_type == BRANCH_INPUT) { |
258 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
259 | if ((branch_config->input_to_branches & (1 << b)) && b != branch) { |
260 | assert(i_width[branch] > 0 && i_height[branch] > 0)((void) sizeof ((i_width[branch] > 0 && i_height[branch ] > 0) ? 1 : 0), __extension__ ({ if (i_width[branch] > 0 && i_height[branch] > 0) ; else __assert_fail ( "i_width[branch] > 0 && i_height[branch] > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 260, __extension__ __PRETTY_FUNCTION__); })); |
261 | i_width[b] = i_width[branch]; |
262 | i_height[b] = i_height[branch]; |
263 | } |
264 | } |
265 | } |
266 | |
267 | av1_find_cnn_layer_output_size(i_width[branch], i_height[branch], |
268 | layer_config, &o_width, &o_height); |
269 | i_width[branch] = o_width; |
270 | i_height[branch] = o_height; |
271 | |
272 | if (layer_config->branch_copy_type == BRANCH_OUTPUT) { |
273 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
274 | if ((branch_config->input_to_branches & (1 << b)) && b != branch) { |
275 | i_width[b] = o_width; |
276 | i_height[b] = o_height; |
277 | } |
278 | } |
279 | } |
280 | |
281 | find_cnn_out_channels(layer_config, channels_per_branch); |
282 | |
283 | const int output_num = layer_config->output_num; |
284 | if (output_num != -1) { // Current layer is an output layer |
285 | out_width[output_num] = o_width; |
286 | out_height[output_num] = o_height; |
287 | out_channels[output_num] = channels_per_branch[layer_config->branch]; |
288 | } |
289 | } |
290 | } |
291 | |
292 | static inline int get_start_shift_convolve(int width, int filt_width, |
293 | int stride) { |
294 | const int mod = (width % stride); |
295 | const int filt_off = (filt_width - 1) / 2; |
296 | const int dif = (mod ? mod - 1 : stride - 1); |
297 | return AOMMIN((dif + (filt_width % 2)) / 2, filt_off)((((dif + (filt_width % 2)) / 2) < (filt_off)) ? ((dif + ( filt_width % 2)) / 2) : (filt_off)); |
298 | } |
299 | |
300 | void av1_cnn_add_c(float **output, int channels, int width, int height, |
301 | int stride, const float **add) { |
302 | for (int c = 0; c < channels; ++c) { |
303 | for (int i = 0; i < height; ++i) |
304 | for (int j = 0; j < width; ++j) |
305 | output[c][i * stride + j] += add[c][i * stride + j]; |
306 | } |
307 | } |
308 | |
309 | void av1_cnn_activate_c(float **output, int channels, int width, int height, |
310 | int stride, ACTIVATION layer_activation) { |
311 | if (layer_activation == RELU) { |
312 | for (int c = 0; c < channels; ++c) { |
313 | for (int i = 0; i < height; ++i) |
314 | for (int j = 0; j < width; ++j) |
315 | output[c][i * stride + j] = relu(output[c][i * stride + j]); |
316 | } |
317 | } else if (layer_activation == SOFTSIGN) { |
318 | for (int c = 0; c < channels; ++c) { |
319 | for (int i = 0; i < height; ++i) |
320 | for (int j = 0; j < width; ++j) |
321 | output[c][i * stride + j] = softsign(output[c][i * stride + j]); |
322 | } |
323 | } else if (layer_activation == SIGMOID) { |
324 | assert(0 && "Sigmoid has not been supported in CNN.")((void) sizeof ((0 && "Sigmoid has not been supported in CNN." ) ? 1 : 0), __extension__ ({ if (0 && "Sigmoid has not been supported in CNN." ) ; else __assert_fail ("0 && \"Sigmoid has not been supported in CNN.\"" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 324 , __extension__ __PRETTY_FUNCTION__); })); // TO DO |
325 | } else if (layer_activation != NONE) { |
326 | assert(0 && "Unknown activation type")((void) sizeof ((0 && "Unknown activation type") ? 1 : 0), __extension__ ({ if (0 && "Unknown activation type" ) ; else __assert_fail ("0 && \"Unknown activation type\"" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 326 , __extension__ __PRETTY_FUNCTION__); })); |
327 | } |
328 | } |
329 | |
330 | static bool_Bool copy_active_tensor_to_branches(const TENSOR *layer_active_tensor, |
331 | const CNN_LAYER_CONFIG *layer_config, |
332 | int branch, TENSOR branch_output[]) { |
333 | const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config; |
334 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
335 | if ((branch_config->input_to_branches & (1 << b)) && b != branch) { |
336 | // Copy layer's active tensor to output tensor of branch b if set in |
337 | // mask. The output becomes the input of the first layer of the branch |
338 | // because the layer of the branch is not the first layer. |
339 | int copy_channels = branch_config->channels_to_copy > 0 |
340 | ? branch_config->channels_to_copy |
341 | : layer_active_tensor->channels; |
342 | if (!realloc_tensor(&branch_output[b], copy_channels, |
343 | layer_active_tensor->width, |
344 | layer_active_tensor->height)) { |
345 | return false0; |
346 | } |
347 | copy_tensor(layer_active_tensor, copy_channels, 0, &branch_output[b]); |
348 | } |
349 | } |
350 | return true1; |
351 | } |
352 | |
353 | // CNNConvolve specific to maxpool set as 1, either skip_width or skip_height |
354 | // greater than 1 and padding equal to PADDING_SAME_ZERO. |
355 | static void convolve_maxpool_padding_zero( |
356 | const float **input, int in_width, int in_height, int in_stride, |
357 | const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, |
358 | const int cstep, const int filter_width_half, |
359 | const int filter_height_half) { |
360 | for (int i = 0; i < layer_config->out_channels; ++i) { |
361 | for (int h = 0, u = 0; h < in_height; h += layer_config->skip_height, ++u) { |
362 | for (int w = 0, v = 0; w < in_width; w += layer_config->skip_width, ++v) { |
363 | for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height)(((in_height) < (h + layer_config->skip_height)) ? (in_height ) : (h + layer_config->skip_height)); |
364 | ++hh) { |
365 | for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width)(((in_width) < (w + layer_config->skip_width)) ? (in_width ) : (w + layer_config->skip_width)); |
366 | ++ww) { |
367 | float sum = layer_config->bias[i]; |
368 | for (int k = 0; k < layer_config->in_channels; ++k) { |
369 | int off = k * layer_config->out_channels + i; |
370 | for (int l = 0; l < layer_config->filter_height; ++l) { |
371 | const int ii = hh + l - filter_height_half; |
372 | for (int m = 0; m < layer_config->filter_width; |
373 | ++m, off += cstep) { |
374 | const int jj = ww + m - filter_width_half; |
375 | if (ii < 0 || ii >= in_height || jj < 0 || jj >= in_width) |
376 | continue; |
377 | sum += layer_config->weights[off] * |
378 | input[k][ii * in_stride + jj]; |
379 | } |
380 | } |
381 | } |
382 | const float a = sum; |
383 | if (h == hh && w == ww) |
384 | output[i][u * out_stride + v] = a; |
385 | else |
386 | output[i][u * out_stride + v] = |
387 | AOMMAX(output[i][u * out_stride + v], a)(((output[i][u * out_stride + v]) > (a)) ? (output[i][u * out_stride + v]) : (a)); |
388 | } |
389 | } |
390 | } |
391 | } |
392 | } |
393 | } |
394 | |
395 | // CNNConvolve specific to maxpool set as 1, either skip_width or skip_height |
396 | // greater than 1 and padding equal to PADDING_SAME_REPLICATE. |
397 | static void convolve_maxpool_padding_replicate( |
398 | const float **input, int in_width, int in_height, int in_stride, |
399 | const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, |
400 | const int cstep, const int filter_width_half, |
401 | const int filter_height_half) { |
402 | for (int i = 0; i < layer_config->out_channels; ++i) { |
403 | for (int h = 0, u = 0; h < in_height; h += layer_config->skip_height, ++u) { |
404 | for (int w = 0, v = 0; w < in_width; w += layer_config->skip_width, ++v) { |
405 | for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height)(((in_height) < (h + layer_config->skip_height)) ? (in_height ) : (h + layer_config->skip_height)); |
406 | ++hh) { |
407 | for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width)(((in_width) < (w + layer_config->skip_width)) ? (in_width ) : (w + layer_config->skip_width)); |
408 | ++ww) { |
409 | float sum = layer_config->bias[i]; |
410 | for (int k = 0; k < layer_config->in_channels; ++k) { |
411 | int off = k * layer_config->out_channels + i; |
412 | for (int l = 0; l < layer_config->filter_height; ++l) { |
413 | const int ii = |
414 | CLAMPINDEX(hh + l - filter_height_half, in_height)((hh + l - filter_height_half) < 0 ? 0 : ((hh + l - filter_height_half ) >= (in_height) ? ((in_height)-1) : (hh + l - filter_height_half ))); |
415 | for (int m = 0; m < layer_config->filter_width; |
416 | ++m, off += cstep) { |
417 | const int jj = |
418 | CLAMPINDEX(ww + m - filter_width_half, in_width)((ww + m - filter_width_half) < 0 ? 0 : ((ww + m - filter_width_half ) >= (in_width) ? ((in_width)-1) : (ww + m - filter_width_half ))); |
419 | assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width)((void) sizeof ((ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ? 1 : 0), __extension__ ({ if (ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ; else __assert_fail ("ii >= 0 && ii < in_height && jj >= 0 && jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 419 , __extension__ __PRETTY_FUNCTION__); })); |
420 | sum += layer_config->weights[off] * |
421 | input[k][ii * in_stride + jj]; |
422 | } |
423 | } |
424 | } |
425 | const float a = sum; |
426 | if (h == hh && w == ww) |
427 | output[i][u * out_stride + v] = a; |
428 | else |
429 | output[i][u * out_stride + v] = |
430 | AOMMAX(output[i][u * out_stride + v], a)(((output[i][u * out_stride + v]) > (a)) ? (output[i][u * out_stride + v]) : (a)); |
431 | } |
432 | } |
433 | } |
434 | } |
435 | } |
436 | } |
437 | |
438 | // CNNConvolve specific to maxpool set as 1, either skip_width or skip_height |
439 | // greater than 1 and padding equal to PADDING_VALID. |
440 | static void convolve_maxpool_padding_valid( |
441 | const float **input, int in_width, int in_height, int in_stride, |
442 | const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, |
443 | const int cstep) { |
444 | for (int i = 0; i < layer_config->out_channels; ++i) { |
445 | for (int h = 0, u = 0; h < in_height - layer_config->filter_height + 1; |
446 | h += layer_config->skip_height, ++u) { |
447 | for (int w = 0, v = 0; w < in_width - layer_config->filter_width + 1; |
448 | w += layer_config->skip_width, ++v) { |
449 | for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height)(((in_height) < (h + layer_config->skip_height)) ? (in_height ) : (h + layer_config->skip_height)); |
450 | ++hh) { |
451 | for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width)(((in_width) < (w + layer_config->skip_width)) ? (in_width ) : (w + layer_config->skip_width)); |
452 | ++ww) { |
453 | float sum = layer_config->bias[i]; |
454 | for (int k = 0; k < layer_config->in_channels; ++k) { |
455 | int off = k * layer_config->out_channels + i; |
456 | for (int l = 0; l < layer_config->filter_height; ++l) { |
457 | const int ii = hh + l; |
458 | for (int m = 0; m < layer_config->filter_width; |
459 | ++m, off += cstep) { |
460 | const int jj = ww + m; |
461 | assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width)((void) sizeof ((ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ? 1 : 0), __extension__ ({ if (ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ; else __assert_fail ("ii >= 0 && ii < in_height && jj >= 0 && jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 461 , __extension__ __PRETTY_FUNCTION__); })); |
462 | sum += layer_config->weights[off] * |
463 | input[k][ii * in_stride + jj]; |
464 | } |
465 | } |
466 | } |
467 | const float a = sum; |
468 | if (h == hh && w == ww) |
469 | output[i][u * out_stride + v] = a; |
470 | else |
471 | output[i][u * out_stride + v] = |
472 | AOMMAX(output[i][u * out_stride + v], a)(((output[i][u * out_stride + v]) > (a)) ? (output[i][u * out_stride + v]) : (a)); |
473 | } |
474 | } |
475 | } |
476 | } |
477 | } |
478 | } |
479 | |
480 | // CNNConvolve specific to maxpool set as 0 with filter_height and filter_width |
481 | // equal to 1. |
482 | static void convolve_element_wise(const float **input, int in_width, |
483 | int in_height, int in_stride, |
484 | const CNN_LAYER_CONFIG *const layer_config, |
485 | float **output, int out_stride, int start_idx, |
486 | int step) { |
487 | const int start_h = get_start_shift_convolve( |
488 | in_height, layer_config->filter_height, layer_config->skip_height); |
489 | const int start_w = |
490 | get_start_shift_convolve(in_width, layer_config->filter_width, |
491 | layer_config->skip_width) + |
492 | start_idx * layer_config->skip_width; |
493 | const int out_w_step = AOMMAX(step, 1)(((step) > (1)) ? (step) : (1)); |
494 | const int in_w_step = layer_config->skip_width * out_w_step; |
495 | for (int i = 0; i < layer_config->out_channels; ++i) { |
496 | for (int h = start_h, u = 0; h < in_height; |
497 | h += layer_config->skip_height, ++u) { |
498 | const int in_h = h * in_stride; |
499 | const int out_h = u * out_stride + start_idx; |
500 | for (int w = start_w, out_index = out_h; w < in_width; |
501 | w += in_w_step, out_index += out_w_step) { |
502 | float sum = layer_config->bias[i]; |
503 | for (int k = 0; k < layer_config->in_channels; ++k) { |
504 | sum += layer_config->weights[k * layer_config->out_channels + i] * |
505 | input[k][in_h + w]; |
506 | } |
507 | output[i][out_index] = sum; |
508 | } |
509 | } |
510 | } |
511 | } |
512 | |
513 | // CNNConvolve specific to maxpool set as 0 and padding equal to |
514 | // PADDING_SAME_ZERO. |
515 | static void convolve_no_maxpool_padding_zero( |
516 | const float **input, int in_width, int in_height, int in_stride, |
517 | const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, |
518 | int start_idx, const int cstep, const int filter_width_half, |
519 | const int filter_height_half, const int ii_shift, const int jj_shift, |
520 | const int channel_step) { |
521 | const int start_h = get_start_shift_convolve( |
522 | in_height, layer_config->filter_height, layer_config->skip_height); |
523 | const int start_w = get_start_shift_convolve( |
524 | in_width, layer_config->filter_width, layer_config->skip_width); |
525 | const int end_ii_shift = filter_height_half + 1; |
526 | const int end_jj_shift = filter_width_half + 1; |
527 | // *_filter_margin stores the number of pixels along a dimension in the |
528 | // intersection of the complement of the image in the extended image |
529 | // and the filter. |
530 | const int top_filter_margin = layer_config->filter_width * ii_shift; |
531 | const int right_filter_margin = end_jj_shift - in_width; |
532 | for (int i = start_idx; i < layer_config->out_channels; i += channel_step) { |
533 | for (int h = start_h, u = 0; h < in_height; |
534 | h += layer_config->skip_height, ++u) { |
535 | const int out_h = u * out_stride; |
536 | const int top_cstep = |
537 | AOMMAX(0, top_filter_margin - h * layer_config->filter_width)(((0) > (top_filter_margin - h * layer_config->filter_width )) ? (0) : (top_filter_margin - h * layer_config->filter_width )) * |
538 | cstep + |
539 | i; |
540 | const int start_ii = AOMMAX(0, h - ii_shift)(((0) > (h - ii_shift)) ? (0) : (h - ii_shift)); |
541 | const int end_ii = AOMMIN(in_height, h + end_ii_shift)(((in_height) < (h + end_ii_shift)) ? (in_height) : (h + end_ii_shift )); |
542 | for (int w = start_w, out_index = out_h; w < in_width; |
543 | w += layer_config->skip_width, ++out_index) { |
544 | const int left_cstep = AOMMAX(0, jj_shift - w)(((0) > (jj_shift - w)) ? (0) : (jj_shift - w)) * cstep; |
545 | const int right_cstep = AOMMAX(0, right_filter_margin + w)(((0) > (right_filter_margin + w)) ? (0) : (right_filter_margin + w)) * cstep; |
546 | const int start_jj = AOMMAX(0, w - jj_shift)(((0) > (w - jj_shift)) ? (0) : (w - jj_shift)); |
547 | const int end_jj = AOMMIN(in_width, w + end_jj_shift)(((in_width) < (w + end_jj_shift)) ? (in_width) : (w + end_jj_shift )); |
548 | float sum = layer_config->bias[i]; |
549 | for (int k = 0; k < layer_config->in_channels; ++k) { |
550 | int off = k * layer_config->out_channels + top_cstep; |
551 | for (int ii = start_ii; ii < end_ii; ++ii) { |
552 | off += left_cstep; |
553 | for (int jj = start_jj; jj < end_jj; ++jj, off += cstep) { |
554 | sum += layer_config->weights[off] * input[k][ii * in_stride + jj]; |
555 | } |
556 | off += right_cstep; |
557 | } |
558 | } |
559 | output[i][out_index] = sum; |
560 | } |
561 | } |
562 | } |
563 | } |
564 | |
565 | // CNNConvolve specific to maxpool set as 0 and padding equal to |
566 | // PADDING_SAME_REPLICATE. |
567 | static void convolve_no_maxpool_padding_replicate( |
568 | const float **input, int in_width, int in_height, int in_stride, |
569 | const CNN_LAYER_CONFIG *const layer_config, float **output, int out_stride, |
570 | int start_idx, const int cstep, const int ii_shift, const int jj_shift, |
571 | const int channel_step) { |
572 | // h and w are shifted to an offset coordinate system to reduce in-loop |
573 | // computation. |
574 | const int start_h = |
575 | get_start_shift_convolve(in_height, layer_config->filter_height, |
576 | layer_config->skip_height) - |
577 | ii_shift; |
578 | const int start_w = |
579 | get_start_shift_convolve(in_width, layer_config->filter_width, |
580 | layer_config->skip_width) - |
581 | jj_shift; |
582 | const int end_h = in_height - ii_shift; |
583 | const int end_w = in_width - jj_shift; |
584 | for (int i = start_idx; i < layer_config->out_channels; i += channel_step) { |
585 | for (int h = start_h, u = 0; h < end_h; |
586 | h += layer_config->skip_height, ++u) { |
587 | const int out_h = u * out_stride; |
588 | const int upper_ii_index = layer_config->filter_height + h; |
589 | for (int w = start_w, out_index = out_h; w < end_w; |
590 | w += layer_config->skip_width, ++out_index) { |
591 | const int upper_jj_index = layer_config->filter_width + w; |
592 | float sum = layer_config->bias[i]; |
593 | for (int k = 0; k < layer_config->in_channels; ++k) { |
594 | int off = k * layer_config->out_channels + i; |
595 | for (int ii = h; ii < upper_ii_index; ++ii) { |
596 | const int clamped_ii = CLAMPINDEX(ii, in_height)((ii) < 0 ? 0 : ((ii) >= (in_height) ? ((in_height)-1) : (ii))); |
597 | for (int jj = w; jj < upper_jj_index; ++jj) { |
598 | const int clamped_jj = CLAMPINDEX(jj, in_width)((jj) < 0 ? 0 : ((jj) >= (in_width) ? ((in_width)-1) : ( jj))); |
599 | assert(clamped_ii >= 0 && clamped_ii < in_height &&((void) sizeof ((clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width) ? 1 : 0), __extension__ ({ if (clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width) ; else __assert_fail ("clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 600 , __extension__ __PRETTY_FUNCTION__); })) |
600 | clamped_jj >= 0 && clamped_jj < in_width)((void) sizeof ((clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width) ? 1 : 0), __extension__ ({ if (clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width) ; else __assert_fail ("clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 600 , __extension__ __PRETTY_FUNCTION__); })); |
601 | sum += layer_config->weights[off] * |
602 | input[k][clamped_ii * in_stride + clamped_jj]; |
603 | off += cstep; |
604 | } |
605 | } |
606 | } |
607 | output[i][out_index] = sum; |
608 | } |
609 | } |
610 | } |
611 | } |
612 | |
613 | // CNNConvolve specific to maxpool set as 0 and padding equal to |
614 | // PADDING_VALID. |
615 | void av1_cnn_convolve_no_maxpool_padding_valid_c( |
616 | const float **input, int in_width, int in_height, int in_stride, |
617 | const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride, |
618 | int start_idx, int cstep, int channel_step) { |
619 | assert((layer_config->skip_height == 1 && layer_config->skip_width == 1) ||((void) sizeof (((layer_config->skip_height == 1 && layer_config->skip_width == 1) || !layer_config->maxpool ) ? 1 : 0), __extension__ ({ if ((layer_config->skip_height == 1 && layer_config->skip_width == 1) || !layer_config ->maxpool) ; else __assert_fail ("(layer_config->skip_height == 1 && layer_config->skip_width == 1) || !layer_config->maxpool" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 620 , __extension__ __PRETTY_FUNCTION__); })) |
620 | !layer_config->maxpool)((void) sizeof (((layer_config->skip_height == 1 && layer_config->skip_width == 1) || !layer_config->maxpool ) ? 1 : 0), __extension__ ({ if ((layer_config->skip_height == 1 && layer_config->skip_width == 1) || !layer_config ->maxpool) ; else __assert_fail ("(layer_config->skip_height == 1 && layer_config->skip_width == 1) || !layer_config->maxpool" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 620 , __extension__ __PRETTY_FUNCTION__); })); |
621 | assert(layer_config->filter_height > 1 || layer_config->filter_width > 1)((void) sizeof ((layer_config->filter_height > 1 || layer_config ->filter_width > 1) ? 1 : 0), __extension__ ({ if (layer_config ->filter_height > 1 || layer_config->filter_width > 1) ; else __assert_fail ("layer_config->filter_height > 1 || layer_config->filter_width > 1" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 621 , __extension__ __PRETTY_FUNCTION__); })); |
622 | assert(layer_config->pad == PADDING_VALID)((void) sizeof ((layer_config->pad == PADDING_VALID) ? 1 : 0), __extension__ ({ if (layer_config->pad == PADDING_VALID ) ; else __assert_fail ("layer_config->pad == PADDING_VALID" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 622 , __extension__ __PRETTY_FUNCTION__); })); |
623 | for (int i = start_idx; i < layer_config->out_channels; i += channel_step) { |
624 | for (int h = 0, u = 0; h < in_height - layer_config->filter_height + 1; |
625 | h += layer_config->skip_height, ++u) { |
626 | const int out_h = u * out_stride; |
627 | const int upper_ii_index = layer_config->filter_height + h; |
628 | for (int w = 0, out_index = out_h; |
629 | w < in_width - layer_config->filter_width + 1; |
630 | w += layer_config->skip_width, ++out_index) { |
631 | const int upper_jj_index = layer_config->filter_width + w; |
632 | float sum = layer_config->bias[i]; |
633 | for (int k = 0; k < layer_config->in_channels; ++k) { |
634 | int off = k * layer_config->out_channels + i; |
635 | for (int ii = h; ii < upper_ii_index; ++ii) { |
636 | for (int jj = w; jj < upper_jj_index; ++jj) { |
637 | assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width)((void) sizeof ((ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ? 1 : 0), __extension__ ({ if (ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ; else __assert_fail ("ii >= 0 && ii < in_height && jj >= 0 && jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 637 , __extension__ __PRETTY_FUNCTION__); })); |
638 | sum += layer_config->weights[off] * input[k][ii * in_stride + jj]; |
639 | off += cstep; |
640 | } |
641 | } |
642 | } |
643 | output[i][out_index] = sum; |
644 | } |
645 | } |
646 | } |
647 | } |
648 | |
649 | static void av1_cnn_convolve(const float **input, int in_width, int in_height, |
650 | int in_stride, |
651 | const CNN_LAYER_CONFIG *layer_config, |
652 | float **output, int out_stride, int start_idx, |
653 | int step) { |
654 | assert(!layer_config->deconvolve)((void) sizeof ((!layer_config->deconvolve) ? 1 : 0), __extension__ ({ if (!layer_config->deconvolve) ; else __assert_fail ("!layer_config->deconvolve" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 654 , __extension__ __PRETTY_FUNCTION__); })); |
655 | const int cstep = layer_config->in_channels * layer_config->out_channels; |
656 | const int filter_height_half = layer_config->filter_height >> 1; |
657 | const int filter_width_half = layer_config->filter_width >> 1; |
658 | const int channel_step = AOMMAX(step, 1)(((step) > (1)) ? (step) : (1)); |
659 | |
660 | if (layer_config->maxpool && |
661 | (layer_config->skip_height > 1 || layer_config->skip_width > 1)) { |
662 | switch (layer_config->pad) { |
663 | case PADDING_SAME_ZERO: |
664 | convolve_maxpool_padding_zero(input, in_width, in_height, in_stride, |
665 | layer_config, output, out_stride, cstep, |
666 | filter_width_half, filter_height_half); |
667 | break; |
668 | case PADDING_SAME_REPLICATE: |
669 | convolve_maxpool_padding_replicate( |
670 | input, in_width, in_height, in_stride, layer_config, output, |
671 | out_stride, cstep, filter_width_half, filter_height_half); |
672 | break; |
673 | case PADDING_VALID: |
674 | convolve_maxpool_padding_valid(input, in_width, in_height, in_stride, |
675 | layer_config, output, out_stride, cstep); |
676 | break; |
677 | default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 677 , __extension__ __PRETTY_FUNCTION__); })); |
678 | } |
679 | } else { |
680 | // Results in element-wise matrix multiplication. |
681 | if (layer_config->filter_height == 1 && layer_config->filter_width == 1) { |
682 | convolve_element_wise(input, in_width, in_height, in_stride, layer_config, |
683 | output, out_stride, start_idx, step); |
684 | return; |
685 | } |
686 | const int ii_shift = |
687 | filter_height_half - (layer_config->filter_height - 1) % 2; |
688 | const int jj_shift = |
689 | filter_width_half - (layer_config->filter_width - 1) % 2; |
690 | switch (layer_config->pad) { |
691 | case PADDING_SAME_ZERO: |
692 | convolve_no_maxpool_padding_zero( |
693 | input, in_width, in_height, in_stride, layer_config, output, |
694 | out_stride, start_idx, cstep, filter_width_half, filter_height_half, |
695 | ii_shift, jj_shift, channel_step); |
696 | break; |
697 | case PADDING_SAME_REPLICATE: |
698 | convolve_no_maxpool_padding_replicate( |
699 | input, in_width, in_height, in_stride, layer_config, output, |
700 | out_stride, start_idx, cstep, ii_shift, jj_shift, channel_step); |
701 | break; |
702 | case PADDING_VALID: |
703 | av1_cnn_convolve_no_maxpool_padding_valid( |
704 | input, in_width, in_height, in_stride, layer_config, output, |
705 | out_stride, start_idx, cstep, channel_step); |
706 | break; |
707 | default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 707 , __extension__ __PRETTY_FUNCTION__); })); |
708 | } |
709 | } |
710 | } |
711 | |
712 | static int convolve_layer(void *arg1, void *arg2) { |
713 | const CONVOLVE_OPS *convolve_ops = arg1; |
714 | (void)arg2; |
715 | av1_cnn_convolve( |
716 | convolve_ops->input, convolve_ops->in_width, convolve_ops->in_height, |
717 | convolve_ops->in_stride, convolve_ops->layer_config, convolve_ops->output, |
718 | convolve_ops->out_stride, convolve_ops->start_idx, convolve_ops->th_step); |
719 | return 1; |
720 | } |
721 | |
722 | static void convolve_layer_mt(const float **input, int in_width, int in_height, |
723 | int in_stride, |
724 | const CNN_LAYER_CONFIG *layer_config, |
725 | const CNN_THREAD_DATA *thread_data, |
726 | float **output, int out_stride) { |
727 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
728 | const int num_workers = thread_data->num_workers; |
729 | assert(thread_data->workers)((void) sizeof ((thread_data->workers) ? 1 : 0), __extension__ ({ if (thread_data->workers) ; else __assert_fail ("thread_data->workers" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 729 , __extension__ __PRETTY_FUNCTION__); })); |
730 | |
731 | CONVOLVE_OPS convolve_ops[CNN_MAX_THREADS32]; |
732 | for (int th = 0; th < AOMMIN(num_workers, CNN_MAX_THREADS)(((num_workers) < (32)) ? (num_workers) : (32)); ++th) { |
733 | AVxWorker *const worker = &thread_data->workers[th]; |
734 | winterface->reset(worker); |
735 | |
736 | CONVOLVE_OPS convolve_op = { input, in_width, in_height, |
737 | in_stride, layer_config, output, |
738 | out_stride, th, num_workers }; |
739 | convolve_ops[th] = convolve_op; |
740 | worker->hook = convolve_layer; |
741 | worker->data1 = &(convolve_ops[th]); |
742 | worker->data2 = NULL((void*)0); |
743 | |
744 | // Start convolving. |
745 | if (th == num_workers - 1) { |
746 | winterface->execute(worker); |
747 | } else { |
748 | winterface->launch(worker); |
749 | } |
750 | } |
751 | |
752 | // Wait until all workers have finished. |
753 | for (int th = 0; th < AOMMIN(num_workers, CNN_MAX_THREADS)(((num_workers) < (32)) ? (num_workers) : (32)); ++th) { |
754 | winterface->sync(&thread_data->workers[th]); |
755 | } |
756 | } |
757 | |
758 | static inline int get_start_shift_deconvolve(int filt_width, int stride) { |
759 | const int dif = AOMMAX(filt_width - stride, 0)(((filt_width - stride) > (0)) ? (filt_width - stride) : ( 0)); |
760 | return dif / 2; |
761 | } |
762 | |
763 | void av1_cnn_batchnorm_c(float **image, int channels, int width, int height, |
764 | int stride, const float *gamma, const float *beta, |
765 | const float *mean, const float *std) { |
766 | assert(gamma && beta && beta && std && "batchnorm has null parameter!")((void) sizeof ((gamma && beta && beta && std && "batchnorm has null parameter!") ? 1 : 0), __extension__ ({ if (gamma && beta && beta && std && "batchnorm has null parameter!") ; else __assert_fail ("gamma && beta && beta && std && \"batchnorm has null parameter!\"" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 766 , __extension__ __PRETTY_FUNCTION__); })); |
767 | for (int ch = 0; ch < channels; ch++) { |
768 | const float ch_gamma = gamma[ch]; |
769 | const float ch_beta = beta[ch]; |
770 | const float ch_mean = mean[ch]; |
771 | const float ch_std = std[ch]; |
772 | float *image_row = image[ch]; |
773 | |
774 | for (int row = 0; row < height; row++) { |
775 | for (int col = 0; col < width; col++) { |
776 | image_row[col] = |
777 | ch_gamma * (image_row[col] - ch_mean) / ch_std + ch_beta; |
778 | } |
779 | image_row += stride; |
780 | } |
781 | } |
782 | } |
783 | |
784 | void av1_cnn_deconvolve_c(const float **input, int in_width, int in_height, |
785 | int in_stride, const CNN_LAYER_CONFIG *layer_config, |
786 | float **output, int out_stride) { |
787 | assert(layer_config->deconvolve)((void) sizeof ((layer_config->deconvolve) ? 1 : 0), __extension__ ({ if (layer_config->deconvolve) ; else __assert_fail ("layer_config->deconvolve" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 787 , __extension__ __PRETTY_FUNCTION__); })); |
788 | |
789 | const int cstep = layer_config->in_channels * layer_config->out_channels; |
790 | |
791 | int out_width = 0; |
792 | int out_height = 0; |
793 | av1_find_cnn_layer_output_size(in_width, in_height, layer_config, &out_width, |
794 | &out_height); |
795 | switch (layer_config->pad) { |
796 | case PADDING_SAME_ZERO: |
797 | for (int i = 0; i < layer_config->out_channels; ++i) { |
798 | for (int u = 0; u < out_height; ++u) { |
799 | for (int v = 0; v < out_width; ++v) { |
800 | float sum = layer_config->bias[i]; |
801 | for (int k = 0; k < layer_config->in_channels; ++k) { |
802 | int off = k * layer_config->out_channels + i; |
803 | for (int l = 0; l < layer_config->filter_height; ++l) { |
804 | const int h = |
805 | u - l + |
806 | get_start_shift_deconvolve(layer_config->filter_height, |
807 | layer_config->skip_height); |
808 | for (int m = 0; m < layer_config->filter_width; |
809 | ++m, off += cstep) { |
810 | const int w = |
811 | v - m + |
812 | get_start_shift_deconvolve(layer_config->filter_width, |
813 | layer_config->skip_width); |
814 | if ((h % layer_config->skip_height) != 0 || |
815 | (w % layer_config->skip_width) != 0) |
816 | continue; |
817 | const int ii = h / layer_config->skip_height; |
818 | const int jj = w / layer_config->skip_width; |
819 | if (ii < 0 || ii >= in_height || jj < 0 || jj >= in_width) |
820 | continue; |
821 | sum += layer_config->weights[off] * |
822 | input[k][ii * in_stride + jj]; |
823 | } |
824 | } |
825 | } |
826 | output[i][u * out_stride + v] = sum; |
827 | } |
828 | } |
829 | } |
830 | break; |
831 | case PADDING_SAME_REPLICATE: |
832 | for (int i = 0; i < layer_config->out_channels; ++i) { |
833 | for (int u = 0; u < out_height; ++u) { |
834 | for (int v = 0; v < out_width; ++v) { |
835 | float sum = layer_config->bias[i]; |
836 | for (int k = 0; k < layer_config->in_channels; ++k) { |
837 | int off = k * layer_config->out_channels + i; |
838 | for (int l = 0; l < layer_config->filter_height; ++l) { |
839 | const int h = |
840 | u - l + |
841 | get_start_shift_deconvolve(layer_config->filter_height, |
842 | layer_config->skip_height); |
843 | for (int m = 0; m < layer_config->filter_width; |
844 | ++m, off += cstep) { |
845 | const int w = |
846 | v - m + |
847 | get_start_shift_deconvolve(layer_config->filter_width, |
848 | layer_config->skip_width); |
849 | if ((h % layer_config->skip_height) != 0 || |
850 | (w % layer_config->skip_width) != 0) |
851 | continue; |
852 | const int ii = |
853 | CLAMPINDEX(h / layer_config->skip_height, in_height)((h / layer_config->skip_height) < 0 ? 0 : ((h / layer_config ->skip_height) >= (in_height) ? ((in_height)-1) : (h / layer_config ->skip_height))); |
854 | const int jj = |
855 | CLAMPINDEX(w / layer_config->skip_width, in_width)((w / layer_config->skip_width) < 0 ? 0 : ((w / layer_config ->skip_width) >= (in_width) ? ((in_width)-1) : (w / layer_config ->skip_width))); |
856 | assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width)((void) sizeof ((ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ? 1 : 0), __extension__ ({ if (ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ; else __assert_fail ("ii >= 0 && ii < in_height && jj >= 0 && jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 856 , __extension__ __PRETTY_FUNCTION__); })); |
857 | sum += layer_config->weights[off] * |
858 | input[k][ii * in_stride + jj]; |
859 | } |
860 | } |
861 | } |
862 | output[i][u * out_stride + v] = sum; |
863 | } |
864 | } |
865 | } |
866 | break; |
867 | case PADDING_VALID: |
868 | for (int i = 0; i < layer_config->out_channels; ++i) { |
869 | for (int u = 0; u < out_height; ++u) { |
870 | for (int v = 0; v < out_width; ++v) { |
871 | float sum = layer_config->bias[i]; |
872 | for (int k = 0; k < layer_config->in_channels; ++k) { |
873 | int off = k * layer_config->out_channels + i; |
874 | for (int l = 0; l < layer_config->filter_height; ++l) { |
875 | const int h = u - l; |
876 | for (int m = 0; m < layer_config->filter_width; |
877 | ++m, off += cstep) { |
878 | const int w = v - m; |
879 | if ((h % layer_config->skip_height) != 0 || |
880 | (w % layer_config->skip_width) != 0) |
881 | continue; |
882 | const int ii = h / layer_config->skip_height; |
883 | const int jj = w / layer_config->skip_width; |
884 | if (ii < 0 || ii >= in_height || jj < 0 || jj >= in_width) |
885 | continue; |
886 | sum += layer_config->weights[off] * |
887 | input[k][ii * in_stride + jj]; |
888 | } |
889 | } |
890 | } |
891 | output[i][u * out_stride + v] = sum; |
892 | } |
893 | } |
894 | } |
895 | break; |
896 | default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 896 , __extension__ __PRETTY_FUNCTION__); })); |
897 | } |
898 | } |
899 | |
900 | bool_Bool av1_cnn_predict_c(const float **input, int in_width, int in_height, |
901 | int in_stride, const CNN_CONFIG *cnn_config, |
902 | const CNN_THREAD_DATA *thread_data, |
903 | CNN_MULTI_OUT *output_struct) { |
904 | bool_Bool success = false0; |
905 | TENSOR tensor1[CNN_MAX_BRANCHES4] = { { 0 } }; |
906 | TENSOR tensor2[CNN_MAX_BRANCHES4] = { { 0 } }; |
907 | |
908 | float **output[CNN_MAX_BRANCHES4]; |
909 | const int *out_chs = output_struct->output_channels; |
910 | output[0] = output_struct->output_buffer; |
911 | for (int out_idx = 1; out_idx < output_struct->num_outputs; out_idx++) { |
912 | output[out_idx] = output[out_idx - 1] + out_chs[out_idx - 1]; |
913 | } |
914 | |
915 | int i_width = in_width; |
916 | int i_height = in_height; |
917 | int o_width = 0, o_height = 0; |
918 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
919 | init_tensor(&tensor1[b]); |
920 | init_tensor(&tensor2[b]); |
921 | } |
922 | |
923 | const int *out_stride = output_struct->output_strides; |
924 | for (int layer = 0; layer < cnn_config->num_layers; ++layer) { |
925 | const CNN_LAYER_CONFIG *layer_config = &cnn_config->layer_config[layer]; |
926 | const int branch = layer_config->branch; |
927 | const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config; |
928 | |
929 | // Allocate input tensor |
930 | if (layer == 0) { // First layer |
931 | assert(branch == 0)((void) sizeof ((branch == 0) ? 1 : 0), __extension__ ({ if ( branch == 0) ; else __assert_fail ("branch == 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 931, __extension__ __PRETTY_FUNCTION__); })); // First layer must be primary branch |
932 | assign_tensor(&tensor1[branch], (float **)input, |
933 | layer_config->in_channels, in_width, in_height, in_stride); |
934 | } else { // Non-first layer |
935 | // Swap tensor1 and tensor2 |
936 | swap_tensor(&tensor1[branch], &tensor2[branch]); |
937 | |
938 | i_width = tensor1[branch].width; |
939 | i_height = tensor1[branch].height; |
940 | } |
941 | |
942 | // Allocate output tensor |
943 | av1_find_cnn_layer_output_size(i_width, i_height, layer_config, &o_width, |
944 | &o_height); |
945 | const int output_num = layer_config->output_num; |
946 | if (output_num == -1) { // Non-output layer |
947 | if (!realloc_tensor(&tensor2[branch], layer_config->out_channels, o_width, |
948 | o_height)) { |
949 | goto Error; |
950 | } |
951 | } else { // Output layer |
952 | free_tensor(&tensor2[branch]); |
953 | assign_tensor(&tensor2[branch], output[output_num], |
954 | layer_config->out_channels, o_width, o_height, |
955 | out_stride[output_num]); |
956 | } |
957 | |
958 | // If we are combining branches make sure that the branch to combine |
959 | // is different from the current branch. |
960 | assert(IMPLIES(layer_config->branch_combine_type != BRANCH_NOC,((void) sizeof (((!(layer_config->branch_combine_type != BRANCH_NOC ) || (!(branch_config->branches_to_combine & (1 << branch))))) ? 1 : 0), __extension__ ({ if ((!(layer_config-> branch_combine_type != BRANCH_NOC) || (!(branch_config->branches_to_combine & (1 << branch))))) ; else __assert_fail ("IMPLIES(layer_config->branch_combine_type != BRANCH_NOC, !(branch_config->branches_to_combine & (1 << branch)))" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 961 , __extension__ __PRETTY_FUNCTION__); })) |
961 | !(branch_config->branches_to_combine & (1 << branch))))((void) sizeof (((!(layer_config->branch_combine_type != BRANCH_NOC ) || (!(branch_config->branches_to_combine & (1 << branch))))) ? 1 : 0), __extension__ ({ if ((!(layer_config-> branch_combine_type != BRANCH_NOC) || (!(branch_config->branches_to_combine & (1 << branch))))) ; else __assert_fail ("IMPLIES(layer_config->branch_combine_type != BRANCH_NOC, !(branch_config->branches_to_combine & (1 << branch)))" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 961 , __extension__ __PRETTY_FUNCTION__); })); |
962 | |
963 | if (layer_config->branch_copy_type == BRANCH_INPUT) { |
964 | if (!copy_active_tensor_to_branches(&tensor1[branch], layer_config, |
965 | branch, tensor2)) { |
966 | goto Error; |
967 | } |
968 | } |
969 | // Check consistency of input and output channels |
970 | assert(tensor1[branch].channels == layer_config->in_channels)((void) sizeof ((tensor1[branch].channels == layer_config-> in_channels) ? 1 : 0), __extension__ ({ if (tensor1[branch].channels == layer_config->in_channels) ; else __assert_fail ("tensor1[branch].channels == layer_config->in_channels" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 970 , __extension__ __PRETTY_FUNCTION__); })); |
971 | assert(tensor2[branch].channels == layer_config->out_channels)((void) sizeof ((tensor2[branch].channels == layer_config-> out_channels) ? 1 : 0), __extension__ ({ if (tensor2[branch]. channels == layer_config->out_channels) ; else __assert_fail ("tensor2[branch].channels == layer_config->out_channels" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 971 , __extension__ __PRETTY_FUNCTION__); })); |
972 | |
973 | // Convolve/Deconvolve |
974 | if (!cnn_config->layer_config[layer].deconvolve) { |
975 | if (thread_data->num_workers > 1) { |
976 | convolve_layer_mt((const float **)tensor1[branch].buf, |
977 | tensor1[branch].width, tensor1[branch].height, |
978 | tensor1[branch].stride, layer_config, thread_data, |
979 | tensor2[branch].buf, tensor2[branch].stride); |
980 | } else { |
981 | av1_cnn_convolve((const float **)tensor1[branch].buf, |
982 | tensor1[branch].width, tensor1[branch].height, |
983 | tensor1[branch].stride, layer_config, |
984 | tensor2[branch].buf, tensor2[branch].stride, 0, 1); |
985 | } |
986 | } else { |
987 | av1_cnn_deconvolveav1_cnn_deconvolve_c((const float **)tensor1[branch].buf, |
988 | tensor1[branch].width, tensor1[branch].height, |
989 | tensor1[branch].stride, layer_config, |
990 | tensor2[branch].buf, tensor2[branch].stride); |
991 | } |
992 | |
993 | if (layer_config->branch_copy_type == BRANCH_OUTPUT) { |
994 | if (!copy_active_tensor_to_branches(&tensor2[branch], layer_config, |
995 | branch, tensor2)) { |
996 | goto Error; |
997 | } |
998 | } |
999 | |
1000 | // Add tensors from other branches if needed |
1001 | if (layer_config->branch_combine_type == BRANCH_ADD) { |
1002 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
1003 | if ((branch_config->branches_to_combine & (1 << b)) && b != branch) { |
1004 | assert(check_tensor_equal_size(&tensor2[b], &tensor2[branch]))((void) sizeof ((check_tensor_equal_size(&tensor2[b], & tensor2[branch])) ? 1 : 0), __extension__ ({ if (check_tensor_equal_size (&tensor2[b], &tensor2[branch])) ; else __assert_fail ("check_tensor_equal_size(&tensor2[b], &tensor2[branch])" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1004 , __extension__ __PRETTY_FUNCTION__); })); |
1005 | av1_cnn_addav1_cnn_add_c(tensor2[branch].buf, tensor2[branch].channels, |
1006 | tensor2[branch].width, tensor2[branch].height, |
1007 | tensor2[branch].stride, (const float **)tensor2[b].buf); |
1008 | } |
1009 | } |
1010 | } |
1011 | |
1012 | // Non-linearity |
1013 | av1_cnn_activateav1_cnn_activate_c(tensor2[branch].buf, tensor2[branch].channels, |
1014 | tensor2[branch].width, tensor2[branch].height, |
1015 | tensor2[branch].stride, layer_config->activation); |
1016 | |
1017 | if (layer_config->bn_params.bn_gamma) { |
1018 | av1_cnn_batchnormav1_cnn_batchnorm_c( |
1019 | tensor2[branch].buf, tensor2[branch].channels, tensor2[branch].width, |
1020 | tensor2[branch].height, tensor2[branch].stride, |
1021 | layer_config->bn_params.bn_gamma, layer_config->bn_params.bn_beta, |
1022 | layer_config->bn_params.bn_mean, layer_config->bn_params.bn_std); |
1023 | } |
1024 | |
1025 | // Concatenate tensors |
1026 | if (layer_config->branch_combine_type == BRANCH_CAT) { |
1027 | if (output_num == -1) { // Non-output layer |
1028 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
1029 | if ((branch_config->branches_to_combine & (1 << b)) && b != branch) { |
1030 | assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch]))((void) sizeof ((check_tensor_equal_dims(&tensor2[b], & tensor2[branch])) ? 1 : 0), __extension__ ({ if (check_tensor_equal_dims (&tensor2[b], &tensor2[branch])) ; else __assert_fail ("check_tensor_equal_dims(&tensor2[b], &tensor2[branch])" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1030 , __extension__ __PRETTY_FUNCTION__); })); |
1031 | assert(tensor2[b].channels > 0)((void) sizeof ((tensor2[b].channels > 0) ? 1 : 0), __extension__ ({ if (tensor2[b].channels > 0) ; else __assert_fail ("tensor2[b].channels > 0" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1031 , __extension__ __PRETTY_FUNCTION__); })); |
1032 | if (!concat_tensor(&tensor2[b], &tensor2[branch])) goto Error; |
1033 | } |
1034 | } |
1035 | } else { // Output layer |
1036 | const int existing_channels = tensor2[branch].channels; |
1037 | int num_chs = existing_channels; |
1038 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
1039 | if ((branch_config->branches_to_combine & (1 << b)) && b != branch) { |
1040 | assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch]))((void) sizeof ((check_tensor_equal_dims(&tensor2[b], & tensor2[branch])) ? 1 : 0), __extension__ ({ if (check_tensor_equal_dims (&tensor2[b], &tensor2[branch])) ; else __assert_fail ("check_tensor_equal_dims(&tensor2[b], &tensor2[branch])" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1040 , __extension__ __PRETTY_FUNCTION__); })); |
1041 | // Needed only to assign the new channel buffers |
1042 | num_chs += tensor2[b].channels; |
1043 | } |
1044 | } |
1045 | assign_tensor(&tensor2[branch], output[output_num], num_chs, o_width, |
1046 | o_height, out_stride[output_num]); |
1047 | |
1048 | num_chs = existing_channels; |
1049 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
1050 | if ((branch_config->branches_to_combine & (1 << b)) && b != branch) { |
1051 | assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch]))((void) sizeof ((check_tensor_equal_dims(&tensor2[b], & tensor2[branch])) ? 1 : 0), __extension__ ({ if (check_tensor_equal_dims (&tensor2[b], &tensor2[branch])) ; else __assert_fail ("check_tensor_equal_dims(&tensor2[b], &tensor2[branch])" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1051 , __extension__ __PRETTY_FUNCTION__); })); |
1052 | // Needed only to assign the new channel buffers |
1053 | copy_tensor(&tensor2[b], tensor2[b].channels, num_chs, |
1054 | &tensor2[branch]); |
1055 | num_chs += tensor2[b].channels; |
1056 | } |
1057 | } |
1058 | } |
1059 | } |
1060 | |
1061 | if (layer_config->branch_copy_type == BRANCH_COMBINED) { |
1062 | if (!copy_active_tensor_to_branches(&tensor2[branch], layer_config, |
1063 | branch, tensor2)) { |
1064 | goto Error; |
1065 | } |
1066 | } |
1067 | } |
1068 | |
1069 | success = true1; |
1070 | Error: |
1071 | for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) { |
1072 | free_tensor(&tensor1[b]); |
1073 | free_tensor(&tensor2[b]); |
1074 | } |
1075 | return success; |
1076 | } |
1077 | |
1078 | // Assume output already has proper allocation |
1079 | // Assume input image buffers all have same resolution and strides |
1080 | bool_Bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height, |
1081 | int stride, const CNN_CONFIG *cnn_config, |
1082 | const CNN_THREAD_DATA *thread_data, |
1083 | CNN_MULTI_OUT *output) { |
1084 | const float max_val = 255.0; |
1085 | |
1086 | const int in_width = width + 2 * cnn_config->ext_width; |
1087 | const int in_height = height + 2 * cnn_config->ext_height; |
1088 | const int in_channels = cnn_config->layer_config[0].in_channels; |
1089 | float *inputs[CNN_MAX_CHANNELS256]; |
1090 | float *input_ = |
1091 | (float *)aom_malloc(in_width * in_height * in_channels * sizeof(*input_)); |
1092 | if (!input_) return false0; |
1093 | const int in_stride = in_width; |
1094 | |
1095 | for (int c = 0; c < in_channels; ++c) { |
1096 | inputs[c] = input_ + c * in_stride * in_height; |
1097 | float *input = |
1098 | inputs[c] + cnn_config->ext_height * in_stride + cnn_config->ext_width; |
1099 | |
1100 | if (cnn_config->strict_bounds) { |
1101 | for (int i = 0; i < height; ++i) |
1102 | for (int j = 0; j < width; ++j) |
1103 | input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val; |
1104 | // extend left and right |
1105 | for (int i = 0; i < height; ++i) { |
1106 | for (int j = -cnn_config->ext_width; j < 0; ++j) |
1107 | input[i * in_stride + j] = input[i * in_stride]; |
1108 | for (int j = width; j < width + cnn_config->ext_width; ++j) |
1109 | input[i * in_stride + j] = input[i * in_stride + width - 1]; |
1110 | } |
1111 | // extend top and bottom |
1112 | for (int i = -cnn_config->ext_height; i < 0; ++i) |
1113 | memcpy(&input[i * in_stride - cnn_config->ext_width], |
1114 | &input[-cnn_config->ext_width], in_width * sizeof(*input)); |
1115 | for (int i = height; i < height + cnn_config->ext_height; ++i) |
1116 | memcpy(&input[i * in_stride - cnn_config->ext_width], |
1117 | &input[(height - 1) * in_stride - cnn_config->ext_width], |
1118 | in_width * sizeof(*input)); |
1119 | } else { |
1120 | for (int i = -cnn_config->ext_height; i < height + cnn_config->ext_height; |
1121 | ++i) |
1122 | for (int j = -cnn_config->ext_width; j < width + cnn_config->ext_width; |
1123 | ++j) |
1124 | input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val; |
1125 | } |
1126 | } |
1127 | bool_Bool success = av1_cnn_predictav1_cnn_predict_c((const float **)inputs, in_width, in_height, |
1128 | in_stride, cnn_config, thread_data, output); |
1129 | |
1130 | aom_free(input_); |
1131 | return success; |
1132 | } |
1133 | |
1134 | // Assume output already has proper allocation |
1135 | // Assume input image buffers all have same resolution and strides |
1136 | bool_Bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height, |
1137 | int stride, |
1138 | const CNN_CONFIG *cnn_config, |
1139 | const CNN_THREAD_DATA *thread_data, |
1140 | int bit_depth, |
1141 | CNN_MULTI_OUT *output) { |
1142 | const float max_val = (float)((1 << bit_depth) - 1); |
1143 | |
1144 | const int in_width = width + 2 * cnn_config->ext_width; |
1145 | const int in_height = height + 2 * cnn_config->ext_height; |
1146 | const int in_channels = cnn_config->layer_config[0].in_channels; |
1147 | float *inputs[CNN_MAX_CHANNELS256]; |
1148 | float *input_ = |
1149 | (float *)aom_malloc(in_width * in_height * in_channels * sizeof(*input_)); |
1150 | if (!input_) return false0; |
1151 | const int in_stride = in_width; |
1152 | |
1153 | for (int c = 0; c < in_channels; ++c) { |
1154 | inputs[c] = input_ + c * in_stride * in_height; |
1155 | float *input = |
1156 | inputs[c] + cnn_config->ext_height * in_stride + cnn_config->ext_width; |
1157 | |
1158 | if (cnn_config->strict_bounds) { |
1159 | for (int i = 0; i < height; ++i) |
1160 | for (int j = 0; j < width; ++j) |
1161 | input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val; |
1162 | // extend left and right |
1163 | for (int i = 0; i < height; ++i) { |
1164 | for (int j = -cnn_config->ext_width; j < 0; ++j) |
1165 | input[i * in_stride + j] = input[i * in_stride]; |
1166 | for (int j = width; j < width + cnn_config->ext_width; ++j) |
1167 | input[i * in_stride + j] = input[i * in_stride + width - 1]; |
1168 | } |
1169 | // extend top and bottom |
1170 | for (int i = -cnn_config->ext_height; i < 0; ++i) |
1171 | memcpy(&input[i * in_stride - cnn_config->ext_width], |
1172 | &input[-cnn_config->ext_width], in_width * sizeof(*input)); |
1173 | for (int i = height; i < height + cnn_config->ext_height; ++i) |
1174 | memcpy(&input[i * in_stride - cnn_config->ext_width], |
1175 | &input[(height - 1) * in_stride - cnn_config->ext_width], |
1176 | in_width * sizeof(*input)); |
1177 | } else { |
1178 | for (int i = -cnn_config->ext_height; i < height + cnn_config->ext_height; |
1179 | ++i) |
1180 | for (int j = -cnn_config->ext_width; j < width + cnn_config->ext_width; |
1181 | ++j) |
1182 | input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val; |
1183 | } |
1184 | } |
1185 | |
1186 | bool_Bool success = av1_cnn_predictav1_cnn_predict_c((const float **)inputs, in_width, in_height, |
1187 | in_stride, cnn_config, thread_data, output); |
1188 | |
1189 | aom_free(input_); |
1190 | return success; |
1191 | } |