/root/firefox-clang/third_party/aom/av1/encoder/cnn.c

Bug Summary

File:	root/firefox-clang/third_party/aom/av1/encoder/cnn.c
Warning:	line 22, column 9 Excessive padding in 'CONVOLVE_OPS' (8 padding bytes, where 0 is optimal). Optimal fields order: input, layer_config, output, in_width, in_height, in_stride, out_stride, start_idx, th_step, consider reordering the fields or adding explicit padding members

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name cnn.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/media/libaom -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/media/libaom -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -D MOZ_HAS_MOZGLUE -I /root/firefox-clang/media/libaom -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/media/libaom -I /root/firefox-clang/media/libaom/config/linux/x64 -I /root/firefox-clang/media/libaom/config -I /root/firefox-clang/third_party/aom -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=tautological-type-limit-compare -Wno-range-loop-analysis -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-unknown-warning-option -Wno-sign-compare -Wno-unused-function -Wno-unreachable-code -Wno-unneeded-internal-declaration -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-30-093548-1913035-1 -x c /root/firefox-clang/third_party/aom/av1/encoder/cnn.c

1	/*
2	* Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3	*
4	* This source code is subject to the terms of the BSD 2 Clause License and
5	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6	* was not distributed with this source code in the LICENSE file, you can
7	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
8	* Media Patent License 1.0 was not distributed with this source code in the
9	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10	*/
11
12	#include <assert.h>
13	#include <math.h>
14	#include <stdbool.h>
15
16	#include "aom_dsp/aom_dsp_common.h"
17	#include "av1/common/av1_common_int.h"
18	#include "av1/encoder/cnn.h"
19
20	#define CLAMPINDEX(a, hi)((a) < 0 ? 0 : ((a) >= (hi) ? ((hi)-1) : (a))) ((a) < 0 ? 0 : ((a) >= (hi) ? ((hi)-1) : (a)))
21
22	typedef struct {
	Excessive padding in 'CONVOLVE_OPS' (8 padding bytes, where 0 is optimal). Optimal fields order: input, layer_config, output, in_width, in_height, in_stride, out_stride, start_idx, th_step, consider reordering the fields or adding explicit padding members
23	const float **input;
24	int in_width;
25	int in_height;
26	int in_stride;
27	const CNN_LAYER_CONFIG *layer_config;
28	float **output;
29	int out_stride;
30	int start_idx;
31	int th_step;
32	} CONVOLVE_OPS;
33
34	static inline float softsign(float x) { return x / (fabsf(x) + 1.0f); }
35
36	static inline float relu(float x) { return (x < 0) ? 0 : x; }
37
38	typedef struct {
39	int allocsize;
40	int channels;
41	int width, height, stride;
42	float *buf[CNN_MAX_CHANNELS256];
43	} TENSOR;
44
45	static void init_tensor(TENSOR tensor) { memset(tensor, 0, sizeof(tensor)); }
46
47	static void free_tensor(TENSOR *tensor) {
48	if (tensor->allocsize) {
49	aom_free(tensor->buf[0]);
50	tensor->buf[0] = NULL((void*)0);
51	tensor->allocsize = 0;
52	}
53	}
54
55	static bool_Bool realloc_tensor(TENSOR *tensor, int channels, int width,
56	int height) {
57	const int newallocsize = channels * width * height;
58	if (tensor->allocsize < newallocsize) {
59	free_tensor(tensor);
60	tensor->buf[0] =
61	(float )aom_malloc(sizeof(tensor->buf[0]) * newallocsize);
62	if (!tensor->buf[0]) return false0;
63	tensor->allocsize = newallocsize;
64	}
65	tensor->width = width;
66	tensor->height = height;
67	tensor->stride = width;
68	tensor->channels = channels;
69	for (int c = 1; c < channels; ++c)
70	tensor->buf[c] = &tensor->buf[0][c * width * height];
71	return true1;
72	}
73
74	static void copy_tensor(const TENSOR *src, int copy_channels, int dst_offset,
75	TENSOR *dst) {
76	assert(src->width == dst->width)((void) sizeof ((src->width == dst->width) ? 1 : 0), __extension__ ({ if (src->width == dst->width) ; else __assert_fail ( "src->width == dst->width", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 76, __extension__ __PRETTY_FUNCTION__); }));
77	assert(src->height == dst->height)((void) sizeof ((src->height == dst->height) ? 1 : 0), __extension__ ({ if (src->height == dst->height) ; else __assert_fail ("src->height == dst->height", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 77, __extension__ __PRETTY_FUNCTION__); }));
78	assert(copy_channels <= src->channels)((void) sizeof ((copy_channels <= src->channels) ? 1 : 0 ), __extension__ ({ if (copy_channels <= src->channels) ; else __assert_fail ("copy_channels <= src->channels" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 78 , __extension__ __PRETTY_FUNCTION__); }));
79	if (src->stride == dst->width && dst->stride == dst->width) {
80	for (int c = 0; c < copy_channels; ++c) {
81	memcpy(dst->buf[dst_offset + c], src->buf[c],
82	sizeof(dst->buf[0]) src->width * src->height);
83	}
84	} else {
85	for (int c = 0; c < copy_channels; ++c) {
86	for (int r = 0; r < dst->height; ++r) {
87	memcpy(&dst->buf[dst_offset + c][r * dst->stride],
88	&src->buf[c][r * src->stride],
89	dst->width * sizeof(*dst->buf[c]));
90	}
91	}
92	}
93	}
94
95	static void assign_tensor(TENSOR tensor, float buf[CNN_MAX_CHANNELS256],
96	int channels, int width, int height, int stride) {
97	tensor->allocsize = 0;
98	tensor->channels = channels;
99	tensor->width = width;
100	tensor->height = height;
101	tensor->stride = stride;
102	if (buf) {
103	for (int c = 0; c < channels; ++c) tensor->buf[c] = buf[c];
104	} else {
105	for (int c = 0; c < channels; ++c) tensor->buf[c] = NULL((void*)0);
106	}
107	}
108
109	static void swap_tensor(TENSOR t1, TENSOR t2) {
110	TENSOR t = *t1;
111	t1 = t2;
112	*t2 = t;
113	}
114
115	// The concatenated tensor goes into dst with first the channels in
116	// original dst followed by the channels in the src
117	static bool_Bool concat_tensor(const TENSOR src, TENSOR dst) {
118	assert(src->width == dst->width)((void) sizeof ((src->width == dst->width) ? 1 : 0), __extension__ ({ if (src->width == dst->width) ; else __assert_fail ( "src->width == dst->width", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 118, __extension__ __PRETTY_FUNCTION__); }));
119	assert(src->height == dst->height)((void) sizeof ((src->height == dst->height) ? 1 : 0), __extension__ ({ if (src->height == dst->height) ; else __assert_fail ("src->height == dst->height", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 119, __extension__ __PRETTY_FUNCTION__); }));
120
121	const int dst_channels = dst->channels;
122	const int channels = dst->channels + src->channels;
123	const int newallocsize = channels * dst->width * dst->height;
124	if (dst->allocsize < newallocsize) {
125	TENSOR t;
126	init_tensor(&t);
127	// allocate new buffers and copy first the dst channels
128	if (!realloc_tensor(&t, channels, dst->width, dst->height)) return false0;
129	copy_tensor(dst, dst->channels, 0, &t);
130	// Swap the tensors and free the old buffers
131	swap_tensor(dst, &t);
132	free_tensor(&t);
133	}
134	for (int c = 1; c < channels; ++c)
135	dst->buf[c] = &dst->buf[0][c * dst->width * dst->height];
136	// Copy the channels in src after the first dst_channels channels.
137	copy_tensor(src, src->channels, dst_channels, dst);
138	return true1;
139	}
140
141	#ifndef NDEBUG
142	static int check_tensor_equal_dims(TENSOR t1, TENSOR t2) {
143	return (t1->width == t2->width && t1->height == t2->height);
144	}
145
146	static int check_tensor_equal_size(TENSOR t1, TENSOR t2) {
147	return (t1->channels == t2->channels && t1->width == t2->width &&
148	t1->height == t2->height);
149	}
150	#endif // NDEBUG
151
152	void av1_find_cnn_layer_output_size(int in_width, int in_height,
153	const CNN_LAYER_CONFIG *layer_config,
154	int out_width, int out_height) {
155	assert(layer_config->skip_width > 0)((void) sizeof ((layer_config->skip_width > 0) ? 1 : 0) , __extension__ ({ if (layer_config->skip_width > 0) ; else __assert_fail ("layer_config->skip_width > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 155, __extension__ __PRETTY_FUNCTION__); }));
156	assert(layer_config->skip_height > 0)((void) sizeof ((layer_config->skip_height > 0) ? 1 : 0 ), __extension__ ({ if (layer_config->skip_height > 0) ; else __assert_fail ("layer_config->skip_height > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 156, __extension__ __PRETTY_FUNCTION__); }));
157	if (!layer_config->deconvolve) {
158	switch (layer_config->pad) {
159	case PADDING_SAME_ZERO:
160	case PADDING_SAME_REPLICATE:
161	*out_width = (in_width + layer_config->skip_width - 1) /
162	layer_config->skip_width;
163	*out_height = (in_height + layer_config->skip_height - 1) /
164	layer_config->skip_height;
165	break;
166	case PADDING_VALID:
167	*out_width =
168	(in_width - layer_config->filter_width + layer_config->skip_width) /
169	layer_config->skip_width;
170	*out_height = (in_height - layer_config->filter_height +
171	layer_config->skip_height) /
172	layer_config->skip_height;
173	break;
174	default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 174 , __extension__ __PRETTY_FUNCTION__); }));
175	}
176	} else {
177	switch (layer_config->pad) {
178	case PADDING_SAME_ZERO:
179	case PADDING_SAME_REPLICATE:
180	out_width = in_width layer_config->skip_width;
181	out_height = in_height layer_config->skip_height;
182	break;
183	case PADDING_VALID:
184	out_width = (in_width - 1) layer_config->skip_width +
185	layer_config->filter_width;
186	out_height = (in_height - 1) layer_config->skip_height +
187	layer_config->filter_height;
188	break;
189	default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 189 , __extension__ __PRETTY_FUNCTION__); }));
190	}
191	}
192	}
193
194	static void find_cnn_out_channels(const CNN_LAYER_CONFIG *layer_config,
195	int channels_per_branch[]) {
196	int branch = layer_config->branch;
197	const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config;
198	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
199	if ((branch_config->input_to_branches & (1 << b)) && b != branch) {
200	if (layer_config->branch_copy_type == BRANCH_INPUT) {
201	channels_per_branch[b] = layer_config->in_channels;
202	} else if (layer_config->branch_copy_type == BRANCH_OUTPUT) {
203	channels_per_branch[b] = layer_config->out_channels;
204	} else if (layer_config->branch_copy_type == BRANCH_COMBINED) {
205	channels_per_branch[b] = layer_config->out_channels;
206	for (int c = 0; c < CNN_MAX_BRANCHES4; ++c) {
207	if ((branch_config->branches_to_combine & (1 << c)) && c != branch) {
208	assert(channels_per_branch[c] > 0)((void) sizeof ((channels_per_branch[c] > 0) ? 1 : 0), __extension__ ({ if (channels_per_branch[c] > 0) ; else __assert_fail ( "channels_per_branch[c] > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 208, __extension__ __PRETTY_FUNCTION__); }));
209	channels_per_branch[b] += channels_per_branch[c];
210	}
211	}
212	}
213	}
214	}
215	channels_per_branch[branch] = layer_config->out_channels;
216	for (int c = 0; c < CNN_MAX_BRANCHES4; ++c) {
217	if ((branch_config->branches_to_combine & (1 << c)) && c != branch) {
218	assert(channels_per_branch[c] > 0)((void) sizeof ((channels_per_branch[c] > 0) ? 1 : 0), __extension__ ({ if (channels_per_branch[c] > 0) ; else __assert_fail ( "channels_per_branch[c] > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 218, __extension__ __PRETTY_FUNCTION__); }));
219	channels_per_branch[branch] += channels_per_branch[c];
220	}
221	}
222	}
223
224	#if CONFIG_DEBUG0
225	static inline int cnn_has_at_least_one_output(const CNN_CONFIG *cnn_config) {
226	const int num_layers = cnn_config->num_layers;
227	const CNN_LAYER_CONFIG *layer_configs = cnn_config->layer_config;
228
229	for (int idx = 0; idx < num_layers; idx++) {
230	if (layer_configs[idx].output_num != -1) {
231	return 1;
232	}
233	}
234	return 0;
235	}
236	#endif
237
238	void av1_find_cnn_output_size(int in_width, int in_height,
239	const CNN_CONFIG cnn_config, int out_width,
240	int out_height, int out_channels) {
241	int channels_per_branch[CNN_MAX_BRANCHES4] = { 0 };
242	int i_width[CNN_MAX_BRANCHES4] = { 0 };
243	int i_height[CNN_MAX_BRANCHES4] = { 0 };
244	i_width[0] = in_width + cnn_config->ext_width * 2;
245	i_height[0] = in_height + cnn_config->ext_height * 2;
246
247	#if CONFIG_DEBUG0
248	assert(cnn_has_at_least_one_output(cnn_config))((void) sizeof ((cnn_has_at_least_one_output(cnn_config)) ? 1 : 0), __extension__ ({ if (cnn_has_at_least_one_output(cnn_config )) ; else __assert_fail ("cnn_has_at_least_one_output(cnn_config)" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 248 , __extension__ __PRETTY_FUNCTION__); }));
249	#endif
250
251	for (int i = 0; i < cnn_config->num_layers; ++i) {
252	const CNN_LAYER_CONFIG *layer_config = &cnn_config->layer_config[i];
253	const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config;
254	const int branch = layer_config->branch;
255	int o_width = 0, o_height = 0;
256
257	if (layer_config->branch_copy_type == BRANCH_INPUT) {
258	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
259	if ((branch_config->input_to_branches & (1 << b)) && b != branch) {
260	assert(i_width[branch] > 0 && i_height[branch] > 0)((void) sizeof ((i_width[branch] > 0 && i_height[branch ] > 0) ? 1 : 0), __extension__ ({ if (i_width[branch] > 0 && i_height[branch] > 0) ; else __assert_fail ( "i_width[branch] > 0 && i_height[branch] > 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 260, __extension__ __PRETTY_FUNCTION__); }));
261	i_width[b] = i_width[branch];
262	i_height[b] = i_height[branch];
263	}
264	}
265	}
266
267	av1_find_cnn_layer_output_size(i_width[branch], i_height[branch],
268	layer_config, &o_width, &o_height);
269	i_width[branch] = o_width;
270	i_height[branch] = o_height;
271
272	if (layer_config->branch_copy_type == BRANCH_OUTPUT) {
273	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
274	if ((branch_config->input_to_branches & (1 << b)) && b != branch) {
275	i_width[b] = o_width;
276	i_height[b] = o_height;
277	}
278	}
279	}
280
281	find_cnn_out_channels(layer_config, channels_per_branch);
282
283	const int output_num = layer_config->output_num;
284	if (output_num != -1) { // Current layer is an output layer
285	out_width[output_num] = o_width;
286	out_height[output_num] = o_height;
287	out_channels[output_num] = channels_per_branch[layer_config->branch];
288	}
289	}
290	}
291
292	static inline int get_start_shift_convolve(int width, int filt_width,
293	int stride) {
294	const int mod = (width % stride);
295	const int filt_off = (filt_width - 1) / 2;
296	const int dif = (mod ? mod - 1 : stride - 1);
297	return AOMMIN((dif + (filt_width % 2)) / 2, filt_off)((((dif + (filt_width % 2)) / 2) < (filt_off)) ? ((dif + ( filt_width % 2)) / 2) : (filt_off));
298	}
299
300	void av1_cnn_add_c(float **output, int channels, int width, int height,
301	int stride, const float **add) {
302	for (int c = 0; c < channels; ++c) {
303	for (int i = 0; i < height; ++i)
304	for (int j = 0; j < width; ++j)
305	output[c][i * stride + j] += add[c][i * stride + j];
306	}
307	}
308
309	void av1_cnn_activate_c(float **output, int channels, int width, int height,
310	int stride, ACTIVATION layer_activation) {
311	if (layer_activation == RELU) {
312	for (int c = 0; c < channels; ++c) {
313	for (int i = 0; i < height; ++i)
314	for (int j = 0; j < width; ++j)
315	output[c][i * stride + j] = relu(output[c][i * stride + j]);
316	}
317	} else if (layer_activation == SOFTSIGN) {
318	for (int c = 0; c < channels; ++c) {
319	for (int i = 0; i < height; ++i)
320	for (int j = 0; j < width; ++j)
321	output[c][i * stride + j] = softsign(output[c][i * stride + j]);
322	}
323	} else if (layer_activation == SIGMOID) {
324	assert(0 && "Sigmoid has not been supported in CNN.")((void) sizeof ((0 && "Sigmoid has not been supported in CNN." ) ? 1 : 0), __extension__ ({ if (0 && "Sigmoid has not been supported in CNN." ) ; else __assert_fail ("0 && \"Sigmoid has not been supported in CNN.\"" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 324 , __extension__ __PRETTY_FUNCTION__); })); // TO DO
325	} else if (layer_activation != NONE) {
326	assert(0 && "Unknown activation type")((void) sizeof ((0 && "Unknown activation type") ? 1 : 0), __extension__ ({ if (0 && "Unknown activation type" ) ; else __assert_fail ("0 && \"Unknown activation type\"" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 326 , __extension__ __PRETTY_FUNCTION__); }));
327	}
328	}
329
330	static bool_Bool copy_active_tensor_to_branches(const TENSOR *layer_active_tensor,
331	const CNN_LAYER_CONFIG *layer_config,
332	int branch, TENSOR branch_output[]) {
333	const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config;
334	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
335	if ((branch_config->input_to_branches & (1 << b)) && b != branch) {
336	// Copy layer's active tensor to output tensor of branch b if set in
337	// mask. The output becomes the input of the first layer of the branch
338	// because the layer of the branch is not the first layer.
339	int copy_channels = branch_config->channels_to_copy > 0
340	? branch_config->channels_to_copy
341	: layer_active_tensor->channels;
342	if (!realloc_tensor(&branch_output[b], copy_channels,
343	layer_active_tensor->width,
344	layer_active_tensor->height)) {
345	return false0;
346	}
347	copy_tensor(layer_active_tensor, copy_channels, 0, &branch_output[b]);
348	}
349	}
350	return true1;
351	}
352
353	// CNNConvolve specific to maxpool set as 1, either skip_width or skip_height
354	// greater than 1 and padding equal to PADDING_SAME_ZERO.
355	static void convolve_maxpool_padding_zero(
356	const float **input, int in_width, int in_height, int in_stride,
357	const CNN_LAYER_CONFIG const layer_config, float *output, int out_stride,
358	const int cstep, const int filter_width_half,
359	const int filter_height_half) {
360	for (int i = 0; i < layer_config->out_channels; ++i) {
361	for (int h = 0, u = 0; h < in_height; h += layer_config->skip_height, ++u) {
362	for (int w = 0, v = 0; w < in_width; w += layer_config->skip_width, ++v) {
363	for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height)(((in_height) < (h + layer_config->skip_height)) ? (in_height ) : (h + layer_config->skip_height));
364	++hh) {
365	for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width)(((in_width) < (w + layer_config->skip_width)) ? (in_width ) : (w + layer_config->skip_width));
366	++ww) {
367	float sum = layer_config->bias[i];
368	for (int k = 0; k < layer_config->in_channels; ++k) {
369	int off = k * layer_config->out_channels + i;
370	for (int l = 0; l < layer_config->filter_height; ++l) {
371	const int ii = hh + l - filter_height_half;
372	for (int m = 0; m < layer_config->filter_width;
373	++m, off += cstep) {
374	const int jj = ww + m - filter_width_half;
375	if (ii < 0 \|\| ii >= in_height \|\| jj < 0 \|\| jj >= in_width)
376	continue;
377	sum += layer_config->weights[off] *
378	input[k][ii * in_stride + jj];
379	}
380	}
381	}
382	const float a = sum;
383	if (h == hh && w == ww)
384	output[i][u * out_stride + v] = a;
385	else
386	output[i][u * out_stride + v] =
387	AOMMAX(output[i][u * out_stride + v], a)(((output[i][u * out_stride + v]) > (a)) ? (output[i][u * out_stride + v]) : (a));
388	}
389	}
390	}
391	}
392	}
393	}
394
395	// CNNConvolve specific to maxpool set as 1, either skip_width or skip_height
396	// greater than 1 and padding equal to PADDING_SAME_REPLICATE.
397	static void convolve_maxpool_padding_replicate(
398	const float **input, int in_width, int in_height, int in_stride,
399	const CNN_LAYER_CONFIG const layer_config, float *output, int out_stride,
400	const int cstep, const int filter_width_half,
401	const int filter_height_half) {
402	for (int i = 0; i < layer_config->out_channels; ++i) {
403	for (int h = 0, u = 0; h < in_height; h += layer_config->skip_height, ++u) {
404	for (int w = 0, v = 0; w < in_width; w += layer_config->skip_width, ++v) {
405	for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height)(((in_height) < (h + layer_config->skip_height)) ? (in_height ) : (h + layer_config->skip_height));
406	++hh) {
407	for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width)(((in_width) < (w + layer_config->skip_width)) ? (in_width ) : (w + layer_config->skip_width));
408	++ww) {
409	float sum = layer_config->bias[i];
410	for (int k = 0; k < layer_config->in_channels; ++k) {
411	int off = k * layer_config->out_channels + i;
412	for (int l = 0; l < layer_config->filter_height; ++l) {
413	const int ii =
414	CLAMPINDEX(hh + l - filter_height_half, in_height)((hh + l - filter_height_half) < 0 ? 0 : ((hh + l - filter_height_half ) >= (in_height) ? ((in_height)-1) : (hh + l - filter_height_half )));
415	for (int m = 0; m < layer_config->filter_width;
416	++m, off += cstep) {
417	const int jj =
418	CLAMPINDEX(ww + m - filter_width_half, in_width)((ww + m - filter_width_half) < 0 ? 0 : ((ww + m - filter_width_half ) >= (in_width) ? ((in_width)-1) : (ww + m - filter_width_half )));
419	assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width)((void) sizeof ((ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ? 1 : 0), __extension__ ({ if (ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ; else __assert_fail ("ii >= 0 && ii < in_height && jj >= 0 && jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 419 , __extension__ __PRETTY_FUNCTION__); }));
420	sum += layer_config->weights[off] *
421	input[k][ii * in_stride + jj];
422	}
423	}
424	}
425	const float a = sum;
426	if (h == hh && w == ww)
427	output[i][u * out_stride + v] = a;
428	else
429	output[i][u * out_stride + v] =
430	AOMMAX(output[i][u * out_stride + v], a)(((output[i][u * out_stride + v]) > (a)) ? (output[i][u * out_stride + v]) : (a));
431	}
432	}
433	}
434	}
435	}
436	}
437
438	// CNNConvolve specific to maxpool set as 1, either skip_width or skip_height
439	// greater than 1 and padding equal to PADDING_VALID.
440	static void convolve_maxpool_padding_valid(
441	const float **input, int in_width, int in_height, int in_stride,
442	const CNN_LAYER_CONFIG const layer_config, float *output, int out_stride,
443	const int cstep) {
444	for (int i = 0; i < layer_config->out_channels; ++i) {
445	for (int h = 0, u = 0; h < in_height - layer_config->filter_height + 1;
446	h += layer_config->skip_height, ++u) {
447	for (int w = 0, v = 0; w < in_width - layer_config->filter_width + 1;
448	w += layer_config->skip_width, ++v) {
449	for (int hh = h; hh < AOMMIN(in_height, h + layer_config->skip_height)(((in_height) < (h + layer_config->skip_height)) ? (in_height ) : (h + layer_config->skip_height));
450	++hh) {
451	for (int ww = w; ww < AOMMIN(in_width, w + layer_config->skip_width)(((in_width) < (w + layer_config->skip_width)) ? (in_width ) : (w + layer_config->skip_width));
452	++ww) {
453	float sum = layer_config->bias[i];
454	for (int k = 0; k < layer_config->in_channels; ++k) {
455	int off = k * layer_config->out_channels + i;
456	for (int l = 0; l < layer_config->filter_height; ++l) {
457	const int ii = hh + l;
458	for (int m = 0; m < layer_config->filter_width;
459	++m, off += cstep) {
460	const int jj = ww + m;
461	assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width)((void) sizeof ((ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ? 1 : 0), __extension__ ({ if (ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ; else __assert_fail ("ii >= 0 && ii < in_height && jj >= 0 && jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 461 , __extension__ __PRETTY_FUNCTION__); }));
462	sum += layer_config->weights[off] *
463	input[k][ii * in_stride + jj];
464	}
465	}
466	}
467	const float a = sum;
468	if (h == hh && w == ww)
469	output[i][u * out_stride + v] = a;
470	else
471	output[i][u * out_stride + v] =
472	AOMMAX(output[i][u * out_stride + v], a)(((output[i][u * out_stride + v]) > (a)) ? (output[i][u * out_stride + v]) : (a));
473	}
474	}
475	}
476	}
477	}
478	}
479
480	// CNNConvolve specific to maxpool set as 0 with filter_height and filter_width
481	// equal to 1.
482	static void convolve_element_wise(const float **input, int in_width,
483	int in_height, int in_stride,
484	const CNN_LAYER_CONFIG *const layer_config,
485	float **output, int out_stride, int start_idx,
486	int step) {
487	const int start_h = get_start_shift_convolve(
488	in_height, layer_config->filter_height, layer_config->skip_height);
489	const int start_w =
490	get_start_shift_convolve(in_width, layer_config->filter_width,
491	layer_config->skip_width) +
492	start_idx * layer_config->skip_width;
493	const int out_w_step = AOMMAX(step, 1)(((step) > (1)) ? (step) : (1));
494	const int in_w_step = layer_config->skip_width * out_w_step;
495	for (int i = 0; i < layer_config->out_channels; ++i) {
496	for (int h = start_h, u = 0; h < in_height;
497	h += layer_config->skip_height, ++u) {
498	const int in_h = h * in_stride;
499	const int out_h = u * out_stride + start_idx;
500	for (int w = start_w, out_index = out_h; w < in_width;
501	w += in_w_step, out_index += out_w_step) {
502	float sum = layer_config->bias[i];
503	for (int k = 0; k < layer_config->in_channels; ++k) {
504	sum += layer_config->weights[k * layer_config->out_channels + i] *
505	input[k][in_h + w];
506	}
507	output[i][out_index] = sum;
508	}
509	}
510	}
511	}
512
513	// CNNConvolve specific to maxpool set as 0 and padding equal to
514	// PADDING_SAME_ZERO.
515	static void convolve_no_maxpool_padding_zero(
516	const float **input, int in_width, int in_height, int in_stride,
517	const CNN_LAYER_CONFIG const layer_config, float *output, int out_stride,
518	int start_idx, const int cstep, const int filter_width_half,
519	const int filter_height_half, const int ii_shift, const int jj_shift,
520	const int channel_step) {
521	const int start_h = get_start_shift_convolve(
522	in_height, layer_config->filter_height, layer_config->skip_height);
523	const int start_w = get_start_shift_convolve(
524	in_width, layer_config->filter_width, layer_config->skip_width);
525	const int end_ii_shift = filter_height_half + 1;
526	const int end_jj_shift = filter_width_half + 1;
527	// *_filter_margin stores the number of pixels along a dimension in the
528	// intersection of the complement of the image in the extended image
529	// and the filter.
530	const int top_filter_margin = layer_config->filter_width * ii_shift;
531	const int right_filter_margin = end_jj_shift - in_width;
532	for (int i = start_idx; i < layer_config->out_channels; i += channel_step) {
533	for (int h = start_h, u = 0; h < in_height;
534	h += layer_config->skip_height, ++u) {
535	const int out_h = u * out_stride;
536	const int top_cstep =
537	AOMMAX(0, top_filter_margin - h * layer_config->filter_width)(((0) > (top_filter_margin - h * layer_config->filter_width )) ? (0) : (top_filter_margin - h * layer_config->filter_width )) *
538	cstep +
539	i;
540	const int start_ii = AOMMAX(0, h - ii_shift)(((0) > (h - ii_shift)) ? (0) : (h - ii_shift));
541	const int end_ii = AOMMIN(in_height, h + end_ii_shift)(((in_height) < (h + end_ii_shift)) ? (in_height) : (h + end_ii_shift ));
542	for (int w = start_w, out_index = out_h; w < in_width;
543	w += layer_config->skip_width, ++out_index) {
544	const int left_cstep = AOMMAX(0, jj_shift - w)(((0) > (jj_shift - w)) ? (0) : (jj_shift - w)) * cstep;
545	const int right_cstep = AOMMAX(0, right_filter_margin + w)(((0) > (right_filter_margin + w)) ? (0) : (right_filter_margin + w)) * cstep;
546	const int start_jj = AOMMAX(0, w - jj_shift)(((0) > (w - jj_shift)) ? (0) : (w - jj_shift));
547	const int end_jj = AOMMIN(in_width, w + end_jj_shift)(((in_width) < (w + end_jj_shift)) ? (in_width) : (w + end_jj_shift ));
548	float sum = layer_config->bias[i];
549	for (int k = 0; k < layer_config->in_channels; ++k) {
550	int off = k * layer_config->out_channels + top_cstep;
551	for (int ii = start_ii; ii < end_ii; ++ii) {
552	off += left_cstep;
553	for (int jj = start_jj; jj < end_jj; ++jj, off += cstep) {
554	sum += layer_config->weights[off] * input[k][ii * in_stride + jj];
555	}
556	off += right_cstep;
557	}
558	}
559	output[i][out_index] = sum;
560	}
561	}
562	}
563	}
564
565	// CNNConvolve specific to maxpool set as 0 and padding equal to
566	// PADDING_SAME_REPLICATE.
567	static void convolve_no_maxpool_padding_replicate(
568	const float **input, int in_width, int in_height, int in_stride,
569	const CNN_LAYER_CONFIG const layer_config, float *output, int out_stride,
570	int start_idx, const int cstep, const int ii_shift, const int jj_shift,
571	const int channel_step) {
572	// h and w are shifted to an offset coordinate system to reduce in-loop
573	// computation.
574	const int start_h =
575	get_start_shift_convolve(in_height, layer_config->filter_height,
576	layer_config->skip_height) -
577	ii_shift;
578	const int start_w =
579	get_start_shift_convolve(in_width, layer_config->filter_width,
580	layer_config->skip_width) -
581	jj_shift;
582	const int end_h = in_height - ii_shift;
583	const int end_w = in_width - jj_shift;
584	for (int i = start_idx; i < layer_config->out_channels; i += channel_step) {
585	for (int h = start_h, u = 0; h < end_h;
586	h += layer_config->skip_height, ++u) {
587	const int out_h = u * out_stride;
588	const int upper_ii_index = layer_config->filter_height + h;
589	for (int w = start_w, out_index = out_h; w < end_w;
590	w += layer_config->skip_width, ++out_index) {
591	const int upper_jj_index = layer_config->filter_width + w;
592	float sum = layer_config->bias[i];
593	for (int k = 0; k < layer_config->in_channels; ++k) {
594	int off = k * layer_config->out_channels + i;
595	for (int ii = h; ii < upper_ii_index; ++ii) {
596	const int clamped_ii = CLAMPINDEX(ii, in_height)((ii) < 0 ? 0 : ((ii) >= (in_height) ? ((in_height)-1) : (ii)));
597	for (int jj = w; jj < upper_jj_index; ++jj) {
598	const int clamped_jj = CLAMPINDEX(jj, in_width)((jj) < 0 ? 0 : ((jj) >= (in_width) ? ((in_width)-1) : ( jj)));
599	assert(clamped_ii >= 0 && clamped_ii < in_height &&((void) sizeof ((clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width) ? 1 : 0), __extension__ ({ if (clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width) ; else __assert_fail ("clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 600 , __extension__ __PRETTY_FUNCTION__); }))
600	clamped_jj >= 0 && clamped_jj < in_width)((void) sizeof ((clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width) ? 1 : 0), __extension__ ({ if (clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width) ; else __assert_fail ("clamped_ii >= 0 && clamped_ii < in_height && clamped_jj >= 0 && clamped_jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 600 , __extension__ __PRETTY_FUNCTION__); }));
601	sum += layer_config->weights[off] *
602	input[k][clamped_ii * in_stride + clamped_jj];
603	off += cstep;
604	}
605	}
606	}
607	output[i][out_index] = sum;
608	}
609	}
610	}
611	}
612
613	// CNNConvolve specific to maxpool set as 0 and padding equal to
614	// PADDING_VALID.
615	void av1_cnn_convolve_no_maxpool_padding_valid_c(
616	const float **input, int in_width, int in_height, int in_stride,
617	const CNN_LAYER_CONFIG layer_config, float *output, int out_stride,
618	int start_idx, int cstep, int channel_step) {
619	assert((layer_config->skip_height == 1 && layer_config->skip_width == 1) \|\|((void) sizeof (((layer_config->skip_height == 1 && layer_config->skip_width == 1) \|\| !layer_config->maxpool ) ? 1 : 0), __extension__ ({ if ((layer_config->skip_height == 1 && layer_config->skip_width == 1) \|\| !layer_config ->maxpool) ; else __assert_fail ("(layer_config->skip_height == 1 && layer_config->skip_width == 1) \|\| !layer_config->maxpool" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 620 , __extension__ __PRETTY_FUNCTION__); }))
620	!layer_config->maxpool)((void) sizeof (((layer_config->skip_height == 1 && layer_config->skip_width == 1) \|\| !layer_config->maxpool ) ? 1 : 0), __extension__ ({ if ((layer_config->skip_height == 1 && layer_config->skip_width == 1) \|\| !layer_config ->maxpool) ; else __assert_fail ("(layer_config->skip_height == 1 && layer_config->skip_width == 1) \|\| !layer_config->maxpool" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 620 , __extension__ __PRETTY_FUNCTION__); }));
621	assert(layer_config->filter_height > 1 \|\| layer_config->filter_width > 1)((void) sizeof ((layer_config->filter_height > 1 \|\| layer_config ->filter_width > 1) ? 1 : 0), __extension__ ({ if (layer_config ->filter_height > 1 \|\| layer_config->filter_width > 1) ; else __assert_fail ("layer_config->filter_height > 1 \|\| layer_config->filter_width > 1" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 621 , __extension__ __PRETTY_FUNCTION__); }));
622	assert(layer_config->pad == PADDING_VALID)((void) sizeof ((layer_config->pad == PADDING_VALID) ? 1 : 0), __extension__ ({ if (layer_config->pad == PADDING_VALID ) ; else __assert_fail ("layer_config->pad == PADDING_VALID" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 622 , __extension__ __PRETTY_FUNCTION__); }));
623	for (int i = start_idx; i < layer_config->out_channels; i += channel_step) {
624	for (int h = 0, u = 0; h < in_height - layer_config->filter_height + 1;
625	h += layer_config->skip_height, ++u) {
626	const int out_h = u * out_stride;
627	const int upper_ii_index = layer_config->filter_height + h;
628	for (int w = 0, out_index = out_h;
629	w < in_width - layer_config->filter_width + 1;
630	w += layer_config->skip_width, ++out_index) {
631	const int upper_jj_index = layer_config->filter_width + w;
632	float sum = layer_config->bias[i];
633	for (int k = 0; k < layer_config->in_channels; ++k) {
634	int off = k * layer_config->out_channels + i;
635	for (int ii = h; ii < upper_ii_index; ++ii) {
636	for (int jj = w; jj < upper_jj_index; ++jj) {
637	assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width)((void) sizeof ((ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ? 1 : 0), __extension__ ({ if (ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ; else __assert_fail ("ii >= 0 && ii < in_height && jj >= 0 && jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 637 , __extension__ __PRETTY_FUNCTION__); }));
638	sum += layer_config->weights[off] * input[k][ii * in_stride + jj];
639	off += cstep;
640	}
641	}
642	}
643	output[i][out_index] = sum;
644	}
645	}
646	}
647	}
648
649	static void av1_cnn_convolve(const float **input, int in_width, int in_height,
650	int in_stride,
651	const CNN_LAYER_CONFIG *layer_config,
652	float **output, int out_stride, int start_idx,
653	int step) {
654	assert(!layer_config->deconvolve)((void) sizeof ((!layer_config->deconvolve) ? 1 : 0), __extension__ ({ if (!layer_config->deconvolve) ; else __assert_fail ("!layer_config->deconvolve" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 654 , __extension__ __PRETTY_FUNCTION__); }));
655	const int cstep = layer_config->in_channels * layer_config->out_channels;
656	const int filter_height_half = layer_config->filter_height >> 1;
657	const int filter_width_half = layer_config->filter_width >> 1;
658	const int channel_step = AOMMAX(step, 1)(((step) > (1)) ? (step) : (1));
659
660	if (layer_config->maxpool &&
661	(layer_config->skip_height > 1 \|\| layer_config->skip_width > 1)) {
662	switch (layer_config->pad) {
663	case PADDING_SAME_ZERO:
664	convolve_maxpool_padding_zero(input, in_width, in_height, in_stride,
665	layer_config, output, out_stride, cstep,
666	filter_width_half, filter_height_half);
667	break;
668	case PADDING_SAME_REPLICATE:
669	convolve_maxpool_padding_replicate(
670	input, in_width, in_height, in_stride, layer_config, output,
671	out_stride, cstep, filter_width_half, filter_height_half);
672	break;
673	case PADDING_VALID:
674	convolve_maxpool_padding_valid(input, in_width, in_height, in_stride,
675	layer_config, output, out_stride, cstep);
676	break;
677	default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 677 , __extension__ __PRETTY_FUNCTION__); }));
678	}
679	} else {
680	// Results in element-wise matrix multiplication.
681	if (layer_config->filter_height == 1 && layer_config->filter_width == 1) {
682	convolve_element_wise(input, in_width, in_height, in_stride, layer_config,
683	output, out_stride, start_idx, step);
684	return;
685	}
686	const int ii_shift =
687	filter_height_half - (layer_config->filter_height - 1) % 2;
688	const int jj_shift =
689	filter_width_half - (layer_config->filter_width - 1) % 2;
690	switch (layer_config->pad) {
691	case PADDING_SAME_ZERO:
692	convolve_no_maxpool_padding_zero(
693	input, in_width, in_height, in_stride, layer_config, output,
694	out_stride, start_idx, cstep, filter_width_half, filter_height_half,
695	ii_shift, jj_shift, channel_step);
696	break;
697	case PADDING_SAME_REPLICATE:
698	convolve_no_maxpool_padding_replicate(
699	input, in_width, in_height, in_stride, layer_config, output,
700	out_stride, start_idx, cstep, ii_shift, jj_shift, channel_step);
701	break;
702	case PADDING_VALID:
703	av1_cnn_convolve_no_maxpool_padding_valid(
704	input, in_width, in_height, in_stride, layer_config, output,
705	out_stride, start_idx, cstep, channel_step);
706	break;
707	default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 707 , __extension__ __PRETTY_FUNCTION__); }));
708	}
709	}
710	}
711
712	static int convolve_layer(void arg1, void arg2) {
713	const CONVOLVE_OPS *convolve_ops = arg1;
714	(void)arg2;
715	av1_cnn_convolve(
716	convolve_ops->input, convolve_ops->in_width, convolve_ops->in_height,
717	convolve_ops->in_stride, convolve_ops->layer_config, convolve_ops->output,
718	convolve_ops->out_stride, convolve_ops->start_idx, convolve_ops->th_step);
719	return 1;
720	}
721
722	static void convolve_layer_mt(const float **input, int in_width, int in_height,
723	int in_stride,
724	const CNN_LAYER_CONFIG *layer_config,
725	const CNN_THREAD_DATA *thread_data,
726	float **output, int out_stride) {
727	const AVxWorkerInterface *const winterface = aom_get_worker_interface();
728	const int num_workers = thread_data->num_workers;
729	assert(thread_data->workers)((void) sizeof ((thread_data->workers) ? 1 : 0), __extension__ ({ if (thread_data->workers) ; else __assert_fail ("thread_data->workers" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 729 , __extension__ __PRETTY_FUNCTION__); }));
730
731	CONVOLVE_OPS convolve_ops[CNN_MAX_THREADS32];
732	for (int th = 0; th < AOMMIN(num_workers, CNN_MAX_THREADS)(((num_workers) < (32)) ? (num_workers) : (32)); ++th) {
733	AVxWorker *const worker = &thread_data->workers[th];
734	winterface->reset(worker);
735
736	CONVOLVE_OPS convolve_op = { input, in_width, in_height,
737	in_stride, layer_config, output,
738	out_stride, th, num_workers };
739	convolve_ops[th] = convolve_op;
740	worker->hook = convolve_layer;
741	worker->data1 = &(convolve_ops[th]);
742	worker->data2 = NULL((void*)0);
743
744	// Start convolving.
745	if (th == num_workers - 1) {
746	winterface->execute(worker);
747	} else {
748	winterface->launch(worker);
749	}
750	}
751
752	// Wait until all workers have finished.
753	for (int th = 0; th < AOMMIN(num_workers, CNN_MAX_THREADS)(((num_workers) < (32)) ? (num_workers) : (32)); ++th) {
754	winterface->sync(&thread_data->workers[th]);
755	}
756	}
757
758	static inline int get_start_shift_deconvolve(int filt_width, int stride) {
759	const int dif = AOMMAX(filt_width - stride, 0)(((filt_width - stride) > (0)) ? (filt_width - stride) : ( 0));
760	return dif / 2;
761	}
762
763	void av1_cnn_batchnorm_c(float **image, int channels, int width, int height,
764	int stride, const float gamma, const float beta,
765	const float mean, const float std) {
766	assert(gamma && beta && beta && std && "batchnorm has null parameter!")((void) sizeof ((gamma && beta && beta && std && "batchnorm has null parameter!") ? 1 : 0), __extension__ ({ if (gamma && beta && beta && std && "batchnorm has null parameter!") ; else __assert_fail ("gamma && beta && beta && std && \"batchnorm has null parameter!\"" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 766 , __extension__ __PRETTY_FUNCTION__); }));
767	for (int ch = 0; ch < channels; ch++) {
768	const float ch_gamma = gamma[ch];
769	const float ch_beta = beta[ch];
770	const float ch_mean = mean[ch];
771	const float ch_std = std[ch];
772	float *image_row = image[ch];
773
774	for (int row = 0; row < height; row++) {
775	for (int col = 0; col < width; col++) {
776	image_row[col] =
777	ch_gamma * (image_row[col] - ch_mean) / ch_std + ch_beta;
778	}
779	image_row += stride;
780	}
781	}
782	}
783
784	void av1_cnn_deconvolve_c(const float **input, int in_width, int in_height,
785	int in_stride, const CNN_LAYER_CONFIG *layer_config,
786	float **output, int out_stride) {
787	assert(layer_config->deconvolve)((void) sizeof ((layer_config->deconvolve) ? 1 : 0), __extension__ ({ if (layer_config->deconvolve) ; else __assert_fail ("layer_config->deconvolve" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 787 , __extension__ __PRETTY_FUNCTION__); }));
788
789	const int cstep = layer_config->in_channels * layer_config->out_channels;
790
791	int out_width = 0;
792	int out_height = 0;
793	av1_find_cnn_layer_output_size(in_width, in_height, layer_config, &out_width,
794	&out_height);
795	switch (layer_config->pad) {
796	case PADDING_SAME_ZERO:
797	for (int i = 0; i < layer_config->out_channels; ++i) {
798	for (int u = 0; u < out_height; ++u) {
799	for (int v = 0; v < out_width; ++v) {
800	float sum = layer_config->bias[i];
801	for (int k = 0; k < layer_config->in_channels; ++k) {
802	int off = k * layer_config->out_channels + i;
803	for (int l = 0; l < layer_config->filter_height; ++l) {
804	const int h =
805	u - l +
806	get_start_shift_deconvolve(layer_config->filter_height,
807	layer_config->skip_height);
808	for (int m = 0; m < layer_config->filter_width;
809	++m, off += cstep) {
810	const int w =
811	v - m +
812	get_start_shift_deconvolve(layer_config->filter_width,
813	layer_config->skip_width);
814	if ((h % layer_config->skip_height) != 0 \|\|
815	(w % layer_config->skip_width) != 0)
816	continue;
817	const int ii = h / layer_config->skip_height;
818	const int jj = w / layer_config->skip_width;
819	if (ii < 0 \|\| ii >= in_height \|\| jj < 0 \|\| jj >= in_width)
820	continue;
821	sum += layer_config->weights[off] *
822	input[k][ii * in_stride + jj];
823	}
824	}
825	}
826	output[i][u * out_stride + v] = sum;
827	}
828	}
829	}
830	break;
831	case PADDING_SAME_REPLICATE:
832	for (int i = 0; i < layer_config->out_channels; ++i) {
833	for (int u = 0; u < out_height; ++u) {
834	for (int v = 0; v < out_width; ++v) {
835	float sum = layer_config->bias[i];
836	for (int k = 0; k < layer_config->in_channels; ++k) {
837	int off = k * layer_config->out_channels + i;
838	for (int l = 0; l < layer_config->filter_height; ++l) {
839	const int h =
840	u - l +
841	get_start_shift_deconvolve(layer_config->filter_height,
842	layer_config->skip_height);
843	for (int m = 0; m < layer_config->filter_width;
844	++m, off += cstep) {
845	const int w =
846	v - m +
847	get_start_shift_deconvolve(layer_config->filter_width,
848	layer_config->skip_width);
849	if ((h % layer_config->skip_height) != 0 \|\|
850	(w % layer_config->skip_width) != 0)
851	continue;
852	const int ii =
853	CLAMPINDEX(h / layer_config->skip_height, in_height)((h / layer_config->skip_height) < 0 ? 0 : ((h / layer_config ->skip_height) >= (in_height) ? ((in_height)-1) : (h / layer_config ->skip_height)));
854	const int jj =
855	CLAMPINDEX(w / layer_config->skip_width, in_width)((w / layer_config->skip_width) < 0 ? 0 : ((w / layer_config ->skip_width) >= (in_width) ? ((in_width)-1) : (w / layer_config ->skip_width)));
856	assert(ii >= 0 && ii < in_height && jj >= 0 && jj < in_width)((void) sizeof ((ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ? 1 : 0), __extension__ ({ if (ii >= 0 && ii < in_height && jj >= 0 && jj < in_width) ; else __assert_fail ("ii >= 0 && ii < in_height && jj >= 0 && jj < in_width" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 856 , __extension__ __PRETTY_FUNCTION__); }));
857	sum += layer_config->weights[off] *
858	input[k][ii * in_stride + jj];
859	}
860	}
861	}
862	output[i][u * out_stride + v] = sum;
863	}
864	}
865	}
866	break;
867	case PADDING_VALID:
868	for (int i = 0; i < layer_config->out_channels; ++i) {
869	for (int u = 0; u < out_height; ++u) {
870	for (int v = 0; v < out_width; ++v) {
871	float sum = layer_config->bias[i];
872	for (int k = 0; k < layer_config->in_channels; ++k) {
873	int off = k * layer_config->out_channels + i;
874	for (int l = 0; l < layer_config->filter_height; ++l) {
875	const int h = u - l;
876	for (int m = 0; m < layer_config->filter_width;
877	++m, off += cstep) {
878	const int w = v - m;
879	if ((h % layer_config->skip_height) != 0 \|\|
880	(w % layer_config->skip_width) != 0)
881	continue;
882	const int ii = h / layer_config->skip_height;
883	const int jj = w / layer_config->skip_width;
884	if (ii < 0 \|\| ii >= in_height \|\| jj < 0 \|\| jj >= in_width)
885	continue;
886	sum += layer_config->weights[off] *
887	input[k][ii * in_stride + jj];
888	}
889	}
890	}
891	output[i][u * out_stride + v] = sum;
892	}
893	}
894	}
895	break;
896	default: assert(0 && "Unknown padding type")((void) sizeof ((0 && "Unknown padding type") ? 1 : 0 ), __extension__ ({ if (0 && "Unknown padding type") ; else __assert_fail ("0 && \"Unknown padding type\"", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 896 , __extension__ __PRETTY_FUNCTION__); }));
897	}
898	}
899
900	bool_Bool av1_cnn_predict_c(const float **input, int in_width, int in_height,
901	int in_stride, const CNN_CONFIG *cnn_config,
902	const CNN_THREAD_DATA *thread_data,
903	CNN_MULTI_OUT *output_struct) {
904	bool_Bool success = false0;
905	TENSOR tensor1[CNN_MAX_BRANCHES4] = { { 0 } };
906	TENSOR tensor2[CNN_MAX_BRANCHES4] = { { 0 } };
907
908	float **output[CNN_MAX_BRANCHES4];
909	const int *out_chs = output_struct->output_channels;
910	output[0] = output_struct->output_buffer;
911	for (int out_idx = 1; out_idx < output_struct->num_outputs; out_idx++) {
912	output[out_idx] = output[out_idx - 1] + out_chs[out_idx - 1];
913	}
914
915	int i_width = in_width;
916	int i_height = in_height;
917	int o_width = 0, o_height = 0;
918	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
919	init_tensor(&tensor1[b]);
920	init_tensor(&tensor2[b]);
921	}
922
923	const int *out_stride = output_struct->output_strides;
924	for (int layer = 0; layer < cnn_config->num_layers; ++layer) {
925	const CNN_LAYER_CONFIG *layer_config = &cnn_config->layer_config[layer];
926	const int branch = layer_config->branch;
927	const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config;
928
929	// Allocate input tensor
930	if (layer == 0) { // First layer
931	assert(branch == 0)((void) sizeof ((branch == 0) ? 1 : 0), __extension__ ({ if ( branch == 0) ; else __assert_fail ("branch == 0", "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c" , 931, __extension__ __PRETTY_FUNCTION__); })); // First layer must be primary branch
932	assign_tensor(&tensor1[branch], (float **)input,
933	layer_config->in_channels, in_width, in_height, in_stride);
934	} else { // Non-first layer
935	// Swap tensor1 and tensor2
936	swap_tensor(&tensor1[branch], &tensor2[branch]);
937
938	i_width = tensor1[branch].width;
939	i_height = tensor1[branch].height;
940	}
941
942	// Allocate output tensor
943	av1_find_cnn_layer_output_size(i_width, i_height, layer_config, &o_width,
944	&o_height);
945	const int output_num = layer_config->output_num;
946	if (output_num == -1) { // Non-output layer
947	if (!realloc_tensor(&tensor2[branch], layer_config->out_channels, o_width,
948	o_height)) {
949	goto Error;
950	}
951	} else { // Output layer
952	free_tensor(&tensor2[branch]);
953	assign_tensor(&tensor2[branch], output[output_num],
954	layer_config->out_channels, o_width, o_height,
955	out_stride[output_num]);
956	}
957
958	// If we are combining branches make sure that the branch to combine
959	// is different from the current branch.
960	assert(IMPLIES(layer_config->branch_combine_type != BRANCH_NOC,((void) sizeof (((!(layer_config->branch_combine_type != BRANCH_NOC ) \|\| (!(branch_config->branches_to_combine & (1 << branch))))) ? 1 : 0), __extension__ ({ if ((!(layer_config-> branch_combine_type != BRANCH_NOC) \|\| (!(branch_config->branches_to_combine & (1 << branch))))) ; else __assert_fail ("IMPLIES(layer_config->branch_combine_type != BRANCH_NOC, !(branch_config->branches_to_combine & (1 << branch)))" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 961 , __extension__ __PRETTY_FUNCTION__); }))
961	!(branch_config->branches_to_combine & (1 << branch))))((void) sizeof (((!(layer_config->branch_combine_type != BRANCH_NOC ) \|\| (!(branch_config->branches_to_combine & (1 << branch))))) ? 1 : 0), __extension__ ({ if ((!(layer_config-> branch_combine_type != BRANCH_NOC) \|\| (!(branch_config->branches_to_combine & (1 << branch))))) ; else __assert_fail ("IMPLIES(layer_config->branch_combine_type != BRANCH_NOC, !(branch_config->branches_to_combine & (1 << branch)))" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 961 , __extension__ __PRETTY_FUNCTION__); }));
962
963	if (layer_config->branch_copy_type == BRANCH_INPUT) {
964	if (!copy_active_tensor_to_branches(&tensor1[branch], layer_config,
965	branch, tensor2)) {
966	goto Error;
967	}
968	}
969	// Check consistency of input and output channels
970	assert(tensor1[branch].channels == layer_config->in_channels)((void) sizeof ((tensor1[branch].channels == layer_config-> in_channels) ? 1 : 0), __extension__ ({ if (tensor1[branch].channels == layer_config->in_channels) ; else __assert_fail ("tensor1[branch].channels == layer_config->in_channels" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 970 , __extension__ __PRETTY_FUNCTION__); }));
971	assert(tensor2[branch].channels == layer_config->out_channels)((void) sizeof ((tensor2[branch].channels == layer_config-> out_channels) ? 1 : 0), __extension__ ({ if (tensor2[branch]. channels == layer_config->out_channels) ; else __assert_fail ("tensor2[branch].channels == layer_config->out_channels" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 971 , __extension__ __PRETTY_FUNCTION__); }));
972
973	// Convolve/Deconvolve
974	if (!cnn_config->layer_config[layer].deconvolve) {
975	if (thread_data->num_workers > 1) {
976	convolve_layer_mt((const float **)tensor1[branch].buf,
977	tensor1[branch].width, tensor1[branch].height,
978	tensor1[branch].stride, layer_config, thread_data,
979	tensor2[branch].buf, tensor2[branch].stride);
980	} else {
981	av1_cnn_convolve((const float **)tensor1[branch].buf,
982	tensor1[branch].width, tensor1[branch].height,
983	tensor1[branch].stride, layer_config,
984	tensor2[branch].buf, tensor2[branch].stride, 0, 1);
985	}
986	} else {
987	av1_cnn_deconvolveav1_cnn_deconvolve_c((const float **)tensor1[branch].buf,
988	tensor1[branch].width, tensor1[branch].height,
989	tensor1[branch].stride, layer_config,
990	tensor2[branch].buf, tensor2[branch].stride);
991	}
992
993	if (layer_config->branch_copy_type == BRANCH_OUTPUT) {
994	if (!copy_active_tensor_to_branches(&tensor2[branch], layer_config,
995	branch, tensor2)) {
996	goto Error;
997	}
998	}
999
1000	// Add tensors from other branches if needed
1001	if (layer_config->branch_combine_type == BRANCH_ADD) {
1002	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
1003	if ((branch_config->branches_to_combine & (1 << b)) && b != branch) {
1004	assert(check_tensor_equal_size(&tensor2[b], &tensor2[branch]))((void) sizeof ((check_tensor_equal_size(&tensor2[b], & tensor2[branch])) ? 1 : 0), __extension__ ({ if (check_tensor_equal_size (&tensor2[b], &tensor2[branch])) ; else __assert_fail ("check_tensor_equal_size(&tensor2[b], &tensor2[branch])" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1004 , __extension__ __PRETTY_FUNCTION__); }));
1005	av1_cnn_addav1_cnn_add_c(tensor2[branch].buf, tensor2[branch].channels,
1006	tensor2[branch].width, tensor2[branch].height,
1007	tensor2[branch].stride, (const float **)tensor2[b].buf);
1008	}
1009	}
1010	}
1011
1012	// Non-linearity
1013	av1_cnn_activateav1_cnn_activate_c(tensor2[branch].buf, tensor2[branch].channels,
1014	tensor2[branch].width, tensor2[branch].height,
1015	tensor2[branch].stride, layer_config->activation);
1016
1017	if (layer_config->bn_params.bn_gamma) {
1018	av1_cnn_batchnormav1_cnn_batchnorm_c(
1019	tensor2[branch].buf, tensor2[branch].channels, tensor2[branch].width,
1020	tensor2[branch].height, tensor2[branch].stride,
1021	layer_config->bn_params.bn_gamma, layer_config->bn_params.bn_beta,
1022	layer_config->bn_params.bn_mean, layer_config->bn_params.bn_std);
1023	}
1024
1025	// Concatenate tensors
1026	if (layer_config->branch_combine_type == BRANCH_CAT) {
1027	if (output_num == -1) { // Non-output layer
1028	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
1029	if ((branch_config->branches_to_combine & (1 << b)) && b != branch) {
1030	assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch]))((void) sizeof ((check_tensor_equal_dims(&tensor2[b], & tensor2[branch])) ? 1 : 0), __extension__ ({ if (check_tensor_equal_dims (&tensor2[b], &tensor2[branch])) ; else __assert_fail ("check_tensor_equal_dims(&tensor2[b], &tensor2[branch])" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1030 , __extension__ __PRETTY_FUNCTION__); }));
1031	assert(tensor2[b].channels > 0)((void) sizeof ((tensor2[b].channels > 0) ? 1 : 0), __extension__ ({ if (tensor2[b].channels > 0) ; else __assert_fail ("tensor2[b].channels > 0" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1031 , __extension__ __PRETTY_FUNCTION__); }));
1032	if (!concat_tensor(&tensor2[b], &tensor2[branch])) goto Error;
1033	}
1034	}
1035	} else { // Output layer
1036	const int existing_channels = tensor2[branch].channels;
1037	int num_chs = existing_channels;
1038	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
1039	if ((branch_config->branches_to_combine & (1 << b)) && b != branch) {
1040	assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch]))((void) sizeof ((check_tensor_equal_dims(&tensor2[b], & tensor2[branch])) ? 1 : 0), __extension__ ({ if (check_tensor_equal_dims (&tensor2[b], &tensor2[branch])) ; else __assert_fail ("check_tensor_equal_dims(&tensor2[b], &tensor2[branch])" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1040 , __extension__ __PRETTY_FUNCTION__); }));
1041	// Needed only to assign the new channel buffers
1042	num_chs += tensor2[b].channels;
1043	}
1044	}
1045	assign_tensor(&tensor2[branch], output[output_num], num_chs, o_width,
1046	o_height, out_stride[output_num]);
1047
1048	num_chs = existing_channels;
1049	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
1050	if ((branch_config->branches_to_combine & (1 << b)) && b != branch) {
1051	assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch]))((void) sizeof ((check_tensor_equal_dims(&tensor2[b], & tensor2[branch])) ? 1 : 0), __extension__ ({ if (check_tensor_equal_dims (&tensor2[b], &tensor2[branch])) ; else __assert_fail ("check_tensor_equal_dims(&tensor2[b], &tensor2[branch])" , "/root/firefox-clang/third_party/aom/av1/encoder/cnn.c", 1051 , __extension__ __PRETTY_FUNCTION__); }));
1052	// Needed only to assign the new channel buffers
1053	copy_tensor(&tensor2[b], tensor2[b].channels, num_chs,
1054	&tensor2[branch]);
1055	num_chs += tensor2[b].channels;
1056	}
1057	}
1058	}
1059	}
1060
1061	if (layer_config->branch_copy_type == BRANCH_COMBINED) {
1062	if (!copy_active_tensor_to_branches(&tensor2[branch], layer_config,
1063	branch, tensor2)) {
1064	goto Error;
1065	}
1066	}
1067	}
1068
1069	success = true1;
1070	Error:
1071	for (int b = 0; b < CNN_MAX_BRANCHES4; ++b) {
1072	free_tensor(&tensor1[b]);
1073	free_tensor(&tensor2[b]);
1074	}
1075	return success;
1076	}
1077
1078	// Assume output already has proper allocation
1079	// Assume input image buffers all have same resolution and strides
1080	bool_Bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
1081	int stride, const CNN_CONFIG *cnn_config,
1082	const CNN_THREAD_DATA *thread_data,
1083	CNN_MULTI_OUT *output) {
1084	const float max_val = 255.0;
1085
1086	const int in_width = width + 2 * cnn_config->ext_width;
1087	const int in_height = height + 2 * cnn_config->ext_height;
1088	const int in_channels = cnn_config->layer_config[0].in_channels;
1089	float *inputs[CNN_MAX_CHANNELS256];
1090	float *input_ =
1091	(float )aom_malloc(in_width in_height * in_channels * sizeof(*input_));
1092	if (!input_) return false0;
1093	const int in_stride = in_width;
1094
1095	for (int c = 0; c < in_channels; ++c) {
1096	inputs[c] = input_ + c * in_stride * in_height;
1097	float *input =
1098	inputs[c] + cnn_config->ext_height * in_stride + cnn_config->ext_width;
1099
1100	if (cnn_config->strict_bounds) {
1101	for (int i = 0; i < height; ++i)
1102	for (int j = 0; j < width; ++j)
1103	input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val;
1104	// extend left and right
1105	for (int i = 0; i < height; ++i) {
1106	for (int j = -cnn_config->ext_width; j < 0; ++j)
1107	input[i * in_stride + j] = input[i * in_stride];
1108	for (int j = width; j < width + cnn_config->ext_width; ++j)
1109	input[i * in_stride + j] = input[i * in_stride + width - 1];
1110	}
1111	// extend top and bottom
1112	for (int i = -cnn_config->ext_height; i < 0; ++i)
1113	memcpy(&input[i * in_stride - cnn_config->ext_width],
1114	&input[-cnn_config->ext_width], in_width * sizeof(*input));
1115	for (int i = height; i < height + cnn_config->ext_height; ++i)
1116	memcpy(&input[i * in_stride - cnn_config->ext_width],
1117	&input[(height - 1) * in_stride - cnn_config->ext_width],
1118	in_width * sizeof(*input));
1119	} else {
1120	for (int i = -cnn_config->ext_height; i < height + cnn_config->ext_height;
1121	++i)
1122	for (int j = -cnn_config->ext_width; j < width + cnn_config->ext_width;
1123	++j)
1124	input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val;
1125	}
1126	}
1127	bool_Bool success = av1_cnn_predictav1_cnn_predict_c((const float **)inputs, in_width, in_height,
1128	in_stride, cnn_config, thread_data, output);
1129
1130	aom_free(input_);
1131	return success;
1132	}
1133
1134	// Assume output already has proper allocation
1135	// Assume input image buffers all have same resolution and strides
1136	bool_Bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
1137	int stride,
1138	const CNN_CONFIG *cnn_config,
1139	const CNN_THREAD_DATA *thread_data,
1140	int bit_depth,
1141	CNN_MULTI_OUT *output) {
1142	const float max_val = (float)((1 << bit_depth) - 1);
1143
1144	const int in_width = width + 2 * cnn_config->ext_width;
1145	const int in_height = height + 2 * cnn_config->ext_height;
1146	const int in_channels = cnn_config->layer_config[0].in_channels;
1147	float *inputs[CNN_MAX_CHANNELS256];
1148	float *input_ =
1149	(float )aom_malloc(in_width in_height * in_channels * sizeof(*input_));
1150	if (!input_) return false0;
1151	const int in_stride = in_width;
1152
1153	for (int c = 0; c < in_channels; ++c) {
1154	inputs[c] = input_ + c * in_stride * in_height;
1155	float *input =
1156	inputs[c] + cnn_config->ext_height * in_stride + cnn_config->ext_width;
1157
1158	if (cnn_config->strict_bounds) {
1159	for (int i = 0; i < height; ++i)
1160	for (int j = 0; j < width; ++j)
1161	input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val;
1162	// extend left and right
1163	for (int i = 0; i < height; ++i) {
1164	for (int j = -cnn_config->ext_width; j < 0; ++j)
1165	input[i * in_stride + j] = input[i * in_stride];
1166	for (int j = width; j < width + cnn_config->ext_width; ++j)
1167	input[i * in_stride + j] = input[i * in_stride + width - 1];
1168	}
1169	// extend top and bottom
1170	for (int i = -cnn_config->ext_height; i < 0; ++i)
1171	memcpy(&input[i * in_stride - cnn_config->ext_width],
1172	&input[-cnn_config->ext_width], in_width * sizeof(*input));
1173	for (int i = height; i < height + cnn_config->ext_height; ++i)
1174	memcpy(&input[i * in_stride - cnn_config->ext_width],
1175	&input[(height - 1) * in_stride - cnn_config->ext_width],
1176	in_width * sizeof(*input));
1177	} else {
1178	for (int i = -cnn_config->ext_height; i < height + cnn_config->ext_height;
1179	++i)
1180	for (int j = -cnn_config->ext_width; j < width + cnn_config->ext_width;
1181	++j)
1182	input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val;
1183	}
1184	}
1185
1186	bool_Bool success = av1_cnn_predictav1_cnn_predict_c((const float **)inputs, in_width, in_height,
1187	in_stride, cnn_config, thread_data, output);
1188
1189	aom_free(input_);
1190	return success;
1191	}