File: | root/firefox-clang/third_party/rust/glslopt/glsl-optimizer/src/compiler/glsl/link_varyings.cpp |
Warning: | line 2777, column 7 Access to field 'ir' results in a dereference of a null pointer (loaded from variable 'consumer') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright © 2012 Intel Corporation | |||
3 | * | |||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
5 | * copy of this software and associated documentation files (the "Software"), | |||
6 | * to deal in the Software without restriction, including without limitation | |||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
8 | * and/or sell copies of the Software, and to permit persons to whom the | |||
9 | * Software is furnished to do so, subject to the following conditions: | |||
10 | * | |||
11 | * The above copyright notice and this permission notice (including the next | |||
12 | * paragraph) shall be included in all copies or substantial portions of the | |||
13 | * Software. | |||
14 | * | |||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |||
21 | * DEALINGS IN THE SOFTWARE. | |||
22 | */ | |||
23 | ||||
24 | /** | |||
25 | * \file link_varyings.cpp | |||
26 | * | |||
27 | * Linker functions related specifically to linking varyings between shader | |||
28 | * stages. | |||
29 | */ | |||
30 | ||||
31 | ||||
32 | #include "main/errors.h" | |||
33 | #include "main/mtypes.h" | |||
34 | #include "glsl_symbol_table.h" | |||
35 | #include "glsl_parser_extras.h" | |||
36 | #include "ir_optimization.h" | |||
37 | #include "linker.h" | |||
38 | #include "link_varyings.h" | |||
39 | #include "main/macros.h" | |||
40 | #include "util/hash_table.h" | |||
41 | #include "util/u_math.h" | |||
42 | #include "program.h" | |||
43 | ||||
44 | ||||
45 | /** | |||
46 | * Get the varying type stripped of the outermost array if we're processing | |||
47 | * a stage whose varyings are arrays indexed by a vertex number (such as | |||
48 | * geometry shader inputs). | |||
49 | */ | |||
50 | static const glsl_type * | |||
51 | get_varying_type(const ir_variable *var, gl_shader_stage stage) | |||
52 | { | |||
53 | const glsl_type *type = var->type; | |||
54 | ||||
55 | if (!var->data.patch && | |||
56 | ((var->data.mode == ir_var_shader_out && | |||
57 | stage == MESA_SHADER_TESS_CTRL) || | |||
58 | (var->data.mode == ir_var_shader_in && | |||
59 | (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || | |||
60 | stage == MESA_SHADER_GEOMETRY)))) { | |||
61 | assert(type->is_array())(static_cast <bool> (type->is_array()) ? void (0) : __assert_fail ("type->is_array()", __builtin_FILE (), __builtin_LINE () , __extension__ __PRETTY_FUNCTION__)); | |||
62 | type = type->fields.array; | |||
63 | } | |||
64 | ||||
65 | return type; | |||
66 | } | |||
67 | ||||
68 | static void | |||
69 | create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name, | |||
70 | size_t name_length, unsigned *count, | |||
71 | const char *ifc_member_name, | |||
72 | const glsl_type *ifc_member_t, char ***varying_names) | |||
73 | { | |||
74 | if (t->is_interface()) { | |||
75 | size_t new_length = name_length; | |||
76 | ||||
77 | assert(ifc_member_name && ifc_member_t)(static_cast <bool> (ifc_member_name && ifc_member_t ) ? void (0) : __assert_fail ("ifc_member_name && ifc_member_t" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
78 | ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name); | |||
79 | ||||
80 | create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count, | |||
81 | NULL__null, NULL__null, varying_names); | |||
82 | } else if (t->is_struct()) { | |||
83 | for (unsigned i = 0; i < t->length; i++) { | |||
84 | const char *field = t->fields.structure[i].name; | |||
85 | size_t new_length = name_length; | |||
86 | ||||
87 | ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); | |||
88 | ||||
89 | create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name, | |||
90 | new_length, count, NULL__null, NULL__null, | |||
91 | varying_names); | |||
92 | } | |||
93 | } else if (t->without_array()->is_struct() || | |||
94 | t->without_array()->is_interface() || | |||
95 | (t->is_array() && t->fields.array->is_array())) { | |||
96 | for (unsigned i = 0; i < t->length; i++) { | |||
97 | size_t new_length = name_length; | |||
98 | ||||
99 | /* Append the subscript to the current variable name */ | |||
100 | ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); | |||
101 | ||||
102 | create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length, | |||
103 | count, ifc_member_name, ifc_member_t, | |||
104 | varying_names); | |||
105 | } | |||
106 | } else { | |||
107 | (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name); | |||
108 | } | |||
109 | } | |||
110 | ||||
111 | static bool | |||
112 | process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh, | |||
113 | struct gl_shader_program *prog, | |||
114 | unsigned *num_tfeedback_decls, | |||
115 | char ***varying_names) | |||
116 | { | |||
117 | bool has_xfb_qualifiers = false; | |||
118 | ||||
119 | /* We still need to enable transform feedback mode even if xfb_stride is | |||
120 | * only applied to a global out. Also we don't bother to propagate | |||
121 | * xfb_stride to interface block members so this will catch that case also. | |||
122 | */ | |||
123 | for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS4; j++) { | |||
124 | if (prog->TransformFeedback.BufferStride[j]) { | |||
125 | has_xfb_qualifiers = true; | |||
126 | break; | |||
127 | } | |||
128 | } | |||
129 | ||||
130 | foreach_in_list(ir_instruction, node, sh->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((sh-> ir)->head_sentinel.next) ? (ir_instruction *) ((sh->ir) ->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
131 | ir_variable *var = node->as_variable(); | |||
132 | if (!var || var->data.mode != ir_var_shader_out) | |||
133 | continue; | |||
134 | ||||
135 | /* From the ARB_enhanced_layouts spec: | |||
136 | * | |||
137 | * "Any shader making any static use (after preprocessing) of any of | |||
138 | * these *xfb_* qualifiers will cause the shader to be in a | |||
139 | * transform feedback capturing mode and hence responsible for | |||
140 | * describing the transform feedback setup. This mode will capture | |||
141 | * any output selected by *xfb_offset*, directly or indirectly, to | |||
142 | * a transform feedback buffer." | |||
143 | */ | |||
144 | if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) { | |||
145 | has_xfb_qualifiers = true; | |||
146 | } | |||
147 | ||||
148 | if (var->data.explicit_xfb_offset) { | |||
149 | *num_tfeedback_decls += var->type->varying_count(); | |||
150 | has_xfb_qualifiers = true; | |||
151 | } | |||
152 | } | |||
153 | ||||
154 | if (*num_tfeedback_decls == 0) | |||
155 | return has_xfb_qualifiers; | |||
156 | ||||
157 | unsigned i = 0; | |||
158 | *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls)((char * *) ralloc_array_size(mem_ctx, sizeof(char *), *num_tfeedback_decls )); | |||
159 | foreach_in_list(ir_instruction, node, sh->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((sh-> ir)->head_sentinel.next) ? (ir_instruction *) ((sh->ir) ->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
160 | ir_variable *var = node->as_variable(); | |||
161 | if (!var || var->data.mode != ir_var_shader_out) | |||
162 | continue; | |||
163 | ||||
164 | if (var->data.explicit_xfb_offset) { | |||
165 | char *name; | |||
166 | const glsl_type *type, *member_type; | |||
167 | ||||
168 | if (var->data.from_named_ifc_block) { | |||
169 | type = var->get_interface_type(); | |||
170 | ||||
171 | /* Find the member type before it was altered by lowering */ | |||
172 | const glsl_type *type_wa = type->without_array(); | |||
173 | member_type = | |||
174 | type_wa->fields.structure[type_wa->field_index(var->name)].type; | |||
175 | name = ralloc_strdup(NULL__null, type_wa->name); | |||
176 | } else { | |||
177 | type = var->type; | |||
178 | member_type = NULL__null; | |||
179 | name = ralloc_strdup(NULL__null, var->name); | |||
180 | } | |||
181 | create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i, | |||
182 | var->name, member_type, varying_names); | |||
183 | ralloc_free(name); | |||
184 | } | |||
185 | } | |||
186 | ||||
187 | assert(i == *num_tfeedback_decls)(static_cast <bool> (i == *num_tfeedback_decls) ? void ( 0) : __assert_fail ("i == *num_tfeedback_decls", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
188 | return has_xfb_qualifiers; | |||
189 | } | |||
190 | ||||
191 | /** | |||
192 | * Validate the types and qualifiers of an output from one stage against the | |||
193 | * matching input to another stage. | |||
194 | */ | |||
195 | static void | |||
196 | cross_validate_types_and_qualifiers(struct gl_context *ctx, | |||
197 | struct gl_shader_program *prog, | |||
198 | const ir_variable *input, | |||
199 | const ir_variable *output, | |||
200 | gl_shader_stage consumer_stage, | |||
201 | gl_shader_stage producer_stage) | |||
202 | { | |||
203 | /* Check that the types match between stages. | |||
204 | */ | |||
205 | const glsl_type *type_to_match = input->type; | |||
206 | ||||
207 | /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ | |||
208 | const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX && | |||
209 | consumer_stage != MESA_SHADER_FRAGMENT) || | |||
210 | consumer_stage == MESA_SHADER_GEOMETRY; | |||
211 | if (extra_array_level) { | |||
212 | assert(type_to_match->is_array())(static_cast <bool> (type_to_match->is_array()) ? void (0) : __assert_fail ("type_to_match->is_array()", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
213 | type_to_match = type_to_match->fields.array; | |||
214 | } | |||
215 | ||||
216 | if (type_to_match != output->type) { | |||
217 | if (output->type->is_struct()) { | |||
218 | /* Structures across shader stages can have different name | |||
219 | * and considered to match in type if and only if structure | |||
220 | * members match in name, type, qualification, and declaration | |||
221 | * order. The precision doesn’t need to match. | |||
222 | */ | |||
223 | if (!output->type->record_compare(type_to_match, | |||
224 | false, /* match_name */ | |||
225 | true, /* match_locations */ | |||
226 | false /* match_precision */)) { | |||
227 | linker_error(prog, | |||
228 | "%s shader output `%s' declared as struct `%s', " | |||
229 | "doesn't match in type with %s shader input " | |||
230 | "declared as struct `%s'\n", | |||
231 | _mesa_shader_stage_to_string(producer_stage), | |||
232 | output->name, | |||
233 | output->type->name, | |||
234 | _mesa_shader_stage_to_string(consumer_stage), | |||
235 | input->type->name); | |||
236 | } | |||
237 | } else if (!output->type->is_array() || !is_gl_identifier(output->name)) { | |||
238 | /* There is a bit of a special case for gl_TexCoord. This | |||
239 | * built-in is unsized by default. Applications that variable | |||
240 | * access it must redeclare it with a size. There is some | |||
241 | * language in the GLSL spec that implies the fragment shader | |||
242 | * and vertex shader do not have to agree on this size. Other | |||
243 | * driver behave this way, and one or two applications seem to | |||
244 | * rely on it. | |||
245 | * | |||
246 | * Neither declaration needs to be modified here because the array | |||
247 | * sizes are fixed later when update_array_sizes is called. | |||
248 | * | |||
249 | * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: | |||
250 | * | |||
251 | * "Unlike user-defined varying variables, the built-in | |||
252 | * varying variables don't have a strict one-to-one | |||
253 | * correspondence between the vertex language and the | |||
254 | * fragment language." | |||
255 | */ | |||
256 | linker_error(prog, | |||
257 | "%s shader output `%s' declared as type `%s', " | |||
258 | "but %s shader input declared as type `%s'\n", | |||
259 | _mesa_shader_stage_to_string(producer_stage), | |||
260 | output->name, | |||
261 | output->type->name, | |||
262 | _mesa_shader_stage_to_string(consumer_stage), | |||
263 | input->type->name); | |||
264 | return; | |||
265 | } | |||
266 | } | |||
267 | ||||
268 | /* Check that all of the qualifiers match between stages. | |||
269 | */ | |||
270 | ||||
271 | /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier | |||
272 | * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0 | |||
273 | * conformance test suite does not verify that the qualifiers must match. | |||
274 | * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for | |||
275 | * OpenGLES 3.0 drivers, so we relax the checking in all cases. | |||
276 | */ | |||
277 | if (false /* always skip the centroid check */ && | |||
278 | prog->data->Version < (prog->IsES ? 310 : 430) && | |||
279 | input->data.centroid != output->data.centroid) { | |||
280 | linker_error(prog, | |||
281 | "%s shader output `%s' %s centroid qualifier, " | |||
282 | "but %s shader input %s centroid qualifier\n", | |||
283 | _mesa_shader_stage_to_string(producer_stage), | |||
284 | output->name, | |||
285 | (output->data.centroid) ? "has" : "lacks", | |||
286 | _mesa_shader_stage_to_string(consumer_stage), | |||
287 | (input->data.centroid) ? "has" : "lacks"); | |||
288 | return; | |||
289 | } | |||
290 | ||||
291 | if (input->data.sample != output->data.sample) { | |||
292 | linker_error(prog, | |||
293 | "%s shader output `%s' %s sample qualifier, " | |||
294 | "but %s shader input %s sample qualifier\n", | |||
295 | _mesa_shader_stage_to_string(producer_stage), | |||
296 | output->name, | |||
297 | (output->data.sample) ? "has" : "lacks", | |||
298 | _mesa_shader_stage_to_string(consumer_stage), | |||
299 | (input->data.sample) ? "has" : "lacks"); | |||
300 | return; | |||
301 | } | |||
302 | ||||
303 | if (input->data.patch != output->data.patch) { | |||
304 | linker_error(prog, | |||
305 | "%s shader output `%s' %s patch qualifier, " | |||
306 | "but %s shader input %s patch qualifier\n", | |||
307 | _mesa_shader_stage_to_string(producer_stage), | |||
308 | output->name, | |||
309 | (output->data.patch) ? "has" : "lacks", | |||
310 | _mesa_shader_stage_to_string(consumer_stage), | |||
311 | (input->data.patch) ? "has" : "lacks"); | |||
312 | return; | |||
313 | } | |||
314 | ||||
315 | /* The GLSL 4.30 and GLSL ES 3.00 specifications say: | |||
316 | * | |||
317 | * "As only outputs need be declared with invariant, an output from | |||
318 | * one shader stage will still match an input of a subsequent stage | |||
319 | * without the input being declared as invariant." | |||
320 | * | |||
321 | * while GLSL 4.20 says: | |||
322 | * | |||
323 | * "For variables leaving one shader and coming into another shader, | |||
324 | * the invariant keyword has to be used in both shaders, or a link | |||
325 | * error will result." | |||
326 | * | |||
327 | * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says: | |||
328 | * | |||
329 | * "The invariance of varyings that are declared in both the vertex | |||
330 | * and fragment shaders must match." | |||
331 | */ | |||
332 | if (input->data.explicit_invariant != output->data.explicit_invariant && | |||
333 | prog->data->Version < (prog->IsES ? 300 : 430)) { | |||
334 | linker_error(prog, | |||
335 | "%s shader output `%s' %s invariant qualifier, " | |||
336 | "but %s shader input %s invariant qualifier\n", | |||
337 | _mesa_shader_stage_to_string(producer_stage), | |||
338 | output->name, | |||
339 | (output->data.explicit_invariant) ? "has" : "lacks", | |||
340 | _mesa_shader_stage_to_string(consumer_stage), | |||
341 | (input->data.explicit_invariant) ? "has" : "lacks"); | |||
342 | return; | |||
343 | } | |||
344 | ||||
345 | /* GLSL >= 4.40 removes text requiring interpolation qualifiers | |||
346 | * to match cross stage, they must only match within the same stage. | |||
347 | * | |||
348 | * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec: | |||
349 | * | |||
350 | * "It is a link-time error if, within the same stage, the interpolation | |||
351 | * qualifiers of variables of the same name do not match. | |||
352 | * | |||
353 | * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says: | |||
354 | * | |||
355 | * "When no interpolation qualifier is present, smooth interpolation | |||
356 | * is used." | |||
357 | * | |||
358 | * So we match variables where one is smooth and the other has no explicit | |||
359 | * qualifier. | |||
360 | */ | |||
361 | unsigned input_interpolation = input->data.interpolation; | |||
362 | unsigned output_interpolation = output->data.interpolation; | |||
363 | if (prog->IsES) { | |||
364 | if (input_interpolation == INTERP_MODE_NONE) | |||
365 | input_interpolation = INTERP_MODE_SMOOTH; | |||
366 | if (output_interpolation == INTERP_MODE_NONE) | |||
367 | output_interpolation = INTERP_MODE_SMOOTH; | |||
368 | } | |||
369 | if (input_interpolation != output_interpolation && | |||
370 | prog->data->Version < 440) { | |||
371 | if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) { | |||
372 | linker_error(prog, | |||
373 | "%s shader output `%s' specifies %s " | |||
374 | "interpolation qualifier, " | |||
375 | "but %s shader input specifies %s " | |||
376 | "interpolation qualifier\n", | |||
377 | _mesa_shader_stage_to_string(producer_stage), | |||
378 | output->name, | |||
379 | interpolation_string(output->data.interpolation), | |||
380 | _mesa_shader_stage_to_string(consumer_stage), | |||
381 | interpolation_string(input->data.interpolation)); | |||
382 | return; | |||
383 | } else { | |||
384 | linker_warning(prog, | |||
385 | "%s shader output `%s' specifies %s " | |||
386 | "interpolation qualifier, " | |||
387 | "but %s shader input specifies %s " | |||
388 | "interpolation qualifier\n", | |||
389 | _mesa_shader_stage_to_string(producer_stage), | |||
390 | output->name, | |||
391 | interpolation_string(output->data.interpolation), | |||
392 | _mesa_shader_stage_to_string(consumer_stage), | |||
393 | interpolation_string(input->data.interpolation)); | |||
394 | } | |||
395 | } | |||
396 | } | |||
397 | ||||
398 | /** | |||
399 | * Validate front and back color outputs against single color input | |||
400 | */ | |||
401 | static void | |||
402 | cross_validate_front_and_back_color(struct gl_context *ctx, | |||
403 | struct gl_shader_program *prog, | |||
404 | const ir_variable *input, | |||
405 | const ir_variable *front_color, | |||
406 | const ir_variable *back_color, | |||
407 | gl_shader_stage consumer_stage, | |||
408 | gl_shader_stage producer_stage) | |||
409 | { | |||
410 | if (front_color != NULL__null && front_color->data.assigned) | |||
411 | cross_validate_types_and_qualifiers(ctx, prog, input, front_color, | |||
412 | consumer_stage, producer_stage); | |||
413 | ||||
414 | if (back_color != NULL__null && back_color->data.assigned) | |||
415 | cross_validate_types_and_qualifiers(ctx, prog, input, back_color, | |||
416 | consumer_stage, producer_stage); | |||
417 | } | |||
418 | ||||
419 | static unsigned | |||
420 | compute_variable_location_slot(ir_variable *var, gl_shader_stage stage) | |||
421 | { | |||
422 | unsigned location_start = VARYING_SLOT_VAR0; | |||
423 | ||||
424 | switch (stage) { | |||
425 | case MESA_SHADER_VERTEX: | |||
426 | if (var->data.mode == ir_var_shader_in) | |||
427 | location_start = VERT_ATTRIB_GENERIC0; | |||
428 | break; | |||
429 | case MESA_SHADER_TESS_CTRL: | |||
430 | case MESA_SHADER_TESS_EVAL: | |||
431 | if (var->data.patch) | |||
432 | location_start = VARYING_SLOT_PATCH0((VARYING_SLOT_VAR0 + 32)); | |||
433 | break; | |||
434 | case MESA_SHADER_FRAGMENT: | |||
435 | if (var->data.mode == ir_var_shader_out) | |||
436 | location_start = FRAG_RESULT_DATA0; | |||
437 | break; | |||
438 | default: | |||
439 | break; | |||
440 | } | |||
441 | ||||
442 | return var->data.location - location_start; | |||
443 | } | |||
444 | ||||
445 | struct explicit_location_info { | |||
446 | ir_variable *var; | |||
447 | bool base_type_is_integer; | |||
448 | unsigned base_type_bit_size; | |||
449 | unsigned interpolation; | |||
450 | bool centroid; | |||
451 | bool sample; | |||
452 | bool patch; | |||
453 | }; | |||
454 | ||||
455 | static bool | |||
456 | check_location_aliasing(struct explicit_location_info explicit_locations[][4], | |||
457 | ir_variable *var, | |||
458 | unsigned location, | |||
459 | unsigned component, | |||
460 | unsigned location_limit, | |||
461 | const glsl_type *type, | |||
462 | unsigned interpolation, | |||
463 | bool centroid, | |||
464 | bool sample, | |||
465 | bool patch, | |||
466 | gl_shader_program *prog, | |||
467 | gl_shader_stage stage) | |||
468 | { | |||
469 | unsigned last_comp; | |||
470 | unsigned base_type_bit_size; | |||
471 | const glsl_type *type_without_array = type->without_array(); | |||
472 | const bool base_type_is_integer = | |||
473 | glsl_base_type_is_integer(type_without_array->base_type); | |||
474 | const bool is_struct = type_without_array->is_struct(); | |||
475 | if (is_struct) { | |||
476 | /* structs don't have a defined underlying base type so just treat all | |||
477 | * component slots as used and set the bit size to 0. If there is | |||
478 | * location aliasing, we'll fail anyway later. | |||
479 | */ | |||
480 | last_comp = 4; | |||
481 | base_type_bit_size = 0; | |||
482 | } else { | |||
483 | unsigned dmul = type_without_array->is_64bit() ? 2 : 1; | |||
484 | last_comp = component + type_without_array->vector_elements * dmul; | |||
485 | base_type_bit_size = | |||
486 | glsl_base_type_get_bit_size(type_without_array->base_type); | |||
487 | } | |||
488 | ||||
489 | while (location < location_limit) { | |||
490 | unsigned comp = 0; | |||
491 | while (comp < 4) { | |||
492 | struct explicit_location_info *info = | |||
493 | &explicit_locations[location][comp]; | |||
494 | ||||
495 | if (info->var) { | |||
496 | if (info->var->type->without_array()->is_struct() || is_struct) { | |||
497 | /* Structs cannot share location since they are incompatible | |||
498 | * with any other underlying numerical type. | |||
499 | */ | |||
500 | linker_error(prog, | |||
501 | "%s shader has multiple %sputs sharing the " | |||
502 | "same location that don't have the same " | |||
503 | "underlying numerical type. Struct variable '%s', " | |||
504 | "location %u\n", | |||
505 | _mesa_shader_stage_to_string(stage), | |||
506 | var->data.mode == ir_var_shader_in ? "in" : "out", | |||
507 | is_struct ? var->name : info->var->name, | |||
508 | location); | |||
509 | return false; | |||
510 | } else if (comp >= component && comp < last_comp) { | |||
511 | /* Component aliasing is not allowed */ | |||
512 | linker_error(prog, | |||
513 | "%s shader has multiple %sputs explicitly " | |||
514 | "assigned to location %d and component %d\n", | |||
515 | _mesa_shader_stage_to_string(stage), | |||
516 | var->data.mode == ir_var_shader_in ? "in" : "out", | |||
517 | location, comp); | |||
518 | return false; | |||
519 | } else { | |||
520 | /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout | |||
521 | * Qualifiers, Page 67, (Location aliasing): | |||
522 | * | |||
523 | * " Further, when location aliasing, the aliases sharing the | |||
524 | * location must have the same underlying numerical type | |||
525 | * and bit width (floating-point or integer, 32-bit versus | |||
526 | * 64-bit, etc.) and the same auxiliary storage and | |||
527 | * interpolation qualification." | |||
528 | */ | |||
529 | ||||
530 | /* If the underlying numerical type isn't integer, implicitly | |||
531 | * it will be float or else we would have failed by now. | |||
532 | */ | |||
533 | if (info->base_type_is_integer != base_type_is_integer) { | |||
534 | linker_error(prog, | |||
535 | "%s shader has multiple %sputs sharing the " | |||
536 | "same location that don't have the same " | |||
537 | "underlying numerical type. Location %u " | |||
538 | "component %u.\n", | |||
539 | _mesa_shader_stage_to_string(stage), | |||
540 | var->data.mode == ir_var_shader_in ? | |||
541 | "in" : "out", location, comp); | |||
542 | return false; | |||
543 | } | |||
544 | ||||
545 | if (info->base_type_bit_size != base_type_bit_size) { | |||
546 | linker_error(prog, | |||
547 | "%s shader has multiple %sputs sharing the " | |||
548 | "same location that don't have the same " | |||
549 | "underlying numerical bit size. Location %u " | |||
550 | "component %u.\n", | |||
551 | _mesa_shader_stage_to_string(stage), | |||
552 | var->data.mode == ir_var_shader_in ? | |||
553 | "in" : "out", location, comp); | |||
554 | return false; | |||
555 | } | |||
556 | ||||
557 | if (info->interpolation != interpolation) { | |||
558 | linker_error(prog, | |||
559 | "%s shader has multiple %sputs sharing the " | |||
560 | "same location that don't have the same " | |||
561 | "interpolation qualification. Location %u " | |||
562 | "component %u.\n", | |||
563 | _mesa_shader_stage_to_string(stage), | |||
564 | var->data.mode == ir_var_shader_in ? | |||
565 | "in" : "out", location, comp); | |||
566 | return false; | |||
567 | } | |||
568 | ||||
569 | if (info->centroid != centroid || | |||
570 | info->sample != sample || | |||
571 | info->patch != patch) { | |||
572 | linker_error(prog, | |||
573 | "%s shader has multiple %sputs sharing the " | |||
574 | "same location that don't have the same " | |||
575 | "auxiliary storage qualification. Location %u " | |||
576 | "component %u.\n", | |||
577 | _mesa_shader_stage_to_string(stage), | |||
578 | var->data.mode == ir_var_shader_in ? | |||
579 | "in" : "out", location, comp); | |||
580 | return false; | |||
581 | } | |||
582 | } | |||
583 | } else if (comp >= component && comp < last_comp) { | |||
584 | info->var = var; | |||
585 | info->base_type_is_integer = base_type_is_integer; | |||
586 | info->base_type_bit_size = base_type_bit_size; | |||
587 | info->interpolation = interpolation; | |||
588 | info->centroid = centroid; | |||
589 | info->sample = sample; | |||
590 | info->patch = patch; | |||
591 | } | |||
592 | ||||
593 | comp++; | |||
594 | ||||
595 | /* We need to do some special handling for doubles as dvec3 and | |||
596 | * dvec4 consume two consecutive locations. We don't need to | |||
597 | * worry about components beginning at anything other than 0 as | |||
598 | * the spec does not allow this for dvec3 and dvec4. | |||
599 | */ | |||
600 | if (comp == 4 && last_comp > 4) { | |||
601 | last_comp = last_comp - 4; | |||
602 | /* Bump location index and reset the component index */ | |||
603 | location++; | |||
604 | comp = 0; | |||
605 | component = 0; | |||
606 | } | |||
607 | } | |||
608 | ||||
609 | location++; | |||
610 | } | |||
611 | ||||
612 | return true; | |||
613 | } | |||
614 | ||||
615 | static bool | |||
616 | validate_explicit_variable_location(struct gl_context *ctx, | |||
617 | struct explicit_location_info explicit_locations[][4], | |||
618 | ir_variable *var, | |||
619 | gl_shader_program *prog, | |||
620 | gl_linked_shader *sh) | |||
621 | { | |||
622 | const glsl_type *type = get_varying_type(var, sh->Stage); | |||
623 | unsigned num_elements = type->count_attribute_slots(false); | |||
624 | unsigned idx = compute_variable_location_slot(var, sh->Stage); | |||
625 | unsigned slot_limit = idx + num_elements; | |||
626 | ||||
627 | /* Vertex shader inputs and fragment shader outputs are validated in | |||
628 | * assign_attribute_or_color_locations() so we should not attempt to | |||
629 | * validate them again here. | |||
630 | */ | |||
631 | unsigned slot_max; | |||
632 | if (var->data.mode == ir_var_shader_out) { | |||
633 | assert(sh->Stage != MESA_SHADER_FRAGMENT)(static_cast <bool> (sh->Stage != MESA_SHADER_FRAGMENT ) ? void (0) : __assert_fail ("sh->Stage != MESA_SHADER_FRAGMENT" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
634 | slot_max = | |||
635 | ctx->Const.Program[sh->Stage].MaxOutputComponents / 4; | |||
636 | } else { | |||
637 | assert(var->data.mode == ir_var_shader_in)(static_cast <bool> (var->data.mode == ir_var_shader_in ) ? void (0) : __assert_fail ("var->data.mode == ir_var_shader_in" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
638 | assert(sh->Stage != MESA_SHADER_VERTEX)(static_cast <bool> (sh->Stage != MESA_SHADER_VERTEX ) ? void (0) : __assert_fail ("sh->Stage != MESA_SHADER_VERTEX" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
639 | slot_max = | |||
640 | ctx->Const.Program[sh->Stage].MaxInputComponents / 4; | |||
641 | } | |||
642 | ||||
643 | if (slot_limit > slot_max) { | |||
644 | linker_error(prog, | |||
645 | "Invalid location %u in %s shader\n", | |||
646 | idx, _mesa_shader_stage_to_string(sh->Stage)); | |||
647 | return false; | |||
648 | } | |||
649 | ||||
650 | const glsl_type *type_without_array = type->without_array(); | |||
651 | if (type_without_array->is_interface()) { | |||
652 | for (unsigned i = 0; i < type_without_array->length; i++) { | |||
653 | glsl_struct_field *field = &type_without_array->fields.structure[i]; | |||
654 | unsigned field_location = field->location - | |||
655 | (field->patch ? VARYING_SLOT_PATCH0((VARYING_SLOT_VAR0 + 32)) : VARYING_SLOT_VAR0); | |||
656 | if (!check_location_aliasing(explicit_locations, var, | |||
657 | field_location, | |||
658 | 0, field_location + 1, | |||
659 | field->type, | |||
660 | field->interpolation, | |||
661 | field->centroid, | |||
662 | field->sample, | |||
663 | field->patch, | |||
664 | prog, sh->Stage)) { | |||
665 | return false; | |||
666 | } | |||
667 | } | |||
668 | } else if (!check_location_aliasing(explicit_locations, var, | |||
669 | idx, var->data.location_frac, | |||
670 | slot_limit, type, | |||
671 | var->data.interpolation, | |||
672 | var->data.centroid, | |||
673 | var->data.sample, | |||
674 | var->data.patch, | |||
675 | prog, sh->Stage)) { | |||
676 | return false; | |||
677 | } | |||
678 | ||||
679 | return true; | |||
680 | } | |||
681 | ||||
682 | /** | |||
683 | * Validate explicit locations for the inputs to the first stage and the | |||
684 | * outputs of the last stage in a program, if those are not the VS and FS | |||
685 | * shaders. | |||
686 | */ | |||
687 | void | |||
688 | validate_first_and_last_interface_explicit_locations(struct gl_context *ctx, | |||
689 | struct gl_shader_program *prog, | |||
690 | gl_shader_stage first_stage, | |||
691 | gl_shader_stage last_stage) | |||
692 | { | |||
693 | /* VS inputs and FS outputs are validated in | |||
694 | * assign_attribute_or_color_locations() | |||
695 | */ | |||
696 | bool validate_first_stage = first_stage != MESA_SHADER_VERTEX; | |||
697 | bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT; | |||
698 | if (!validate_first_stage && !validate_last_stage) | |||
699 | return; | |||
700 | ||||
701 | struct explicit_location_info explicit_locations[MAX_VARYING32][4]; | |||
702 | ||||
703 | gl_shader_stage stages[2] = { first_stage, last_stage }; | |||
704 | bool validate_stage[2] = { validate_first_stage, validate_last_stage }; | |||
705 | ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out }; | |||
706 | ||||
707 | for (unsigned i = 0; i < 2; i++) { | |||
708 | if (!validate_stage[i]) | |||
709 | continue; | |||
710 | ||||
711 | gl_shader_stage stage = stages[i]; | |||
712 | ||||
713 | gl_linked_shader *sh = prog->_LinkedShaders[stage]; | |||
714 | assert(sh)(static_cast <bool> (sh) ? void (0) : __assert_fail ("sh" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
715 | ||||
716 | memset(explicit_locations, 0, sizeof(explicit_locations)); | |||
717 | ||||
718 | foreach_in_list(ir_instruction, node, sh->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((sh-> ir)->head_sentinel.next) ? (ir_instruction *) ((sh->ir) ->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
719 | ir_variable *const var = node->as_variable(); | |||
720 | ||||
721 | if (var == NULL__null || | |||
722 | !var->data.explicit_location || | |||
723 | var->data.location < VARYING_SLOT_VAR0 || | |||
724 | var->data.mode != var_direction[i]) | |||
725 | continue; | |||
726 | ||||
727 | if (!validate_explicit_variable_location( | |||
728 | ctx, explicit_locations, var, prog, sh)) { | |||
729 | return; | |||
730 | } | |||
731 | } | |||
732 | } | |||
733 | } | |||
734 | ||||
735 | /** | |||
736 | * Validate that outputs from one stage match inputs of another | |||
737 | */ | |||
738 | void | |||
739 | cross_validate_outputs_to_inputs(struct gl_context *ctx, | |||
740 | struct gl_shader_program *prog, | |||
741 | gl_linked_shader *producer, | |||
742 | gl_linked_shader *consumer) | |||
743 | { | |||
744 | glsl_symbol_table parameters; | |||
745 | struct explicit_location_info output_explicit_locations[MAX_VARYING32][4] = {}; | |||
746 | struct explicit_location_info input_explicit_locations[MAX_VARYING32][4] = {}; | |||
747 | ||||
748 | /* Find all shader outputs in the "producer" stage. | |||
749 | */ | |||
750 | foreach_in_list(ir_instruction, node, producer->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((producer ->ir)->head_sentinel.next) ? (ir_instruction *) ((producer ->ir)->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
751 | ir_variable *const var = node->as_variable(); | |||
752 | ||||
753 | if (var == NULL__null || var->data.mode != ir_var_shader_out) | |||
754 | continue; | |||
755 | ||||
756 | if (!var->data.explicit_location | |||
757 | || var->data.location < VARYING_SLOT_VAR0) | |||
758 | parameters.add_variable(var); | |||
759 | else { | |||
760 | /* User-defined varyings with explicit locations are handled | |||
761 | * differently because they do not need to have matching names. | |||
762 | */ | |||
763 | if (!validate_explicit_variable_location(ctx, | |||
764 | output_explicit_locations, | |||
765 | var, prog, producer)) { | |||
766 | return; | |||
767 | } | |||
768 | } | |||
769 | } | |||
770 | ||||
771 | ||||
772 | /* Find all shader inputs in the "consumer" stage. Any variables that have | |||
773 | * matching outputs already in the symbol table must have the same type and | |||
774 | * qualifiers. | |||
775 | * | |||
776 | * Exception: if the consumer is the geometry shader, then the inputs | |||
777 | * should be arrays and the type of the array element should match the type | |||
778 | * of the corresponding producer output. | |||
779 | */ | |||
780 | foreach_in_list(ir_instruction, node, consumer->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((consumer ->ir)->head_sentinel.next) ? (ir_instruction *) ((consumer ->ir)->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
781 | ir_variable *const input = node->as_variable(); | |||
782 | ||||
783 | if (input == NULL__null || input->data.mode != ir_var_shader_in) | |||
784 | continue; | |||
785 | ||||
786 | if (strcmp(input->name, "gl_Color") == 0 && input->data.used) { | |||
787 | const ir_variable *const front_color = | |||
788 | parameters.get_variable("gl_FrontColor"); | |||
789 | ||||
790 | const ir_variable *const back_color = | |||
791 | parameters.get_variable("gl_BackColor"); | |||
792 | ||||
793 | cross_validate_front_and_back_color(ctx, prog, input, | |||
794 | front_color, back_color, | |||
795 | consumer->Stage, producer->Stage); | |||
796 | } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { | |||
797 | const ir_variable *const front_color = | |||
798 | parameters.get_variable("gl_FrontSecondaryColor"); | |||
799 | ||||
800 | const ir_variable *const back_color = | |||
801 | parameters.get_variable("gl_BackSecondaryColor"); | |||
802 | ||||
803 | cross_validate_front_and_back_color(ctx, prog, input, | |||
804 | front_color, back_color, | |||
805 | consumer->Stage, producer->Stage); | |||
806 | } else { | |||
807 | /* The rules for connecting inputs and outputs change in the presence | |||
808 | * of explicit locations. In this case, we no longer care about the | |||
809 | * names of the variables. Instead, we care only about the | |||
810 | * explicitly assigned location. | |||
811 | */ | |||
812 | ir_variable *output = NULL__null; | |||
813 | if (input->data.explicit_location | |||
814 | && input->data.location >= VARYING_SLOT_VAR0) { | |||
815 | ||||
816 | const glsl_type *type = get_varying_type(input, consumer->Stage); | |||
817 | unsigned num_elements = type->count_attribute_slots(false); | |||
818 | unsigned idx = | |||
819 | compute_variable_location_slot(input, consumer->Stage); | |||
820 | unsigned slot_limit = idx + num_elements; | |||
821 | ||||
822 | if (!validate_explicit_variable_location(ctx, | |||
823 | input_explicit_locations, | |||
824 | input, prog, consumer)) { | |||
825 | return; | |||
826 | } | |||
827 | ||||
828 | while (idx < slot_limit) { | |||
829 | if (idx >= MAX_VARYING32) { | |||
830 | linker_error(prog, | |||
831 | "Invalid location %u in %s shader\n", idx, | |||
832 | _mesa_shader_stage_to_string(consumer->Stage)); | |||
833 | return; | |||
834 | } | |||
835 | ||||
836 | output = output_explicit_locations[idx][input->data.location_frac].var; | |||
837 | ||||
838 | if (output == NULL__null) { | |||
839 | /* A linker failure should only happen when there is no | |||
840 | * output declaration and there is Static Use of the | |||
841 | * declared input. | |||
842 | */ | |||
843 | if (input->data.used) { | |||
844 | linker_error(prog, | |||
845 | "%s shader input `%s' with explicit location " | |||
846 | "has no matching output\n", | |||
847 | _mesa_shader_stage_to_string(consumer->Stage), | |||
848 | input->name); | |||
849 | break; | |||
850 | } | |||
851 | } else if (input->data.location != output->data.location) { | |||
852 | linker_error(prog, | |||
853 | "%s shader input `%s' with explicit location " | |||
854 | "has no matching output\n", | |||
855 | _mesa_shader_stage_to_string(consumer->Stage), | |||
856 | input->name); | |||
857 | break; | |||
858 | } | |||
859 | idx++; | |||
860 | } | |||
861 | } else { | |||
862 | output = parameters.get_variable(input->name); | |||
863 | } | |||
864 | ||||
865 | if (output != NULL__null) { | |||
866 | /* Interface blocks have their own validation elsewhere so don't | |||
867 | * try validating them here. | |||
868 | */ | |||
869 | if (!(input->get_interface_type() && | |||
870 | output->get_interface_type())) | |||
871 | cross_validate_types_and_qualifiers(ctx, prog, input, output, | |||
872 | consumer->Stage, | |||
873 | producer->Stage); | |||
874 | } else { | |||
875 | /* Check for input vars with unmatched output vars in prev stage | |||
876 | * taking into account that interface blocks could have a matching | |||
877 | * output but with different name, so we ignore them. | |||
878 | */ | |||
879 | assert(!input->data.assigned)(static_cast <bool> (!input->data.assigned) ? void ( 0) : __assert_fail ("!input->data.assigned", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
880 | if (input->data.used && !input->get_interface_type() && | |||
881 | !input->data.explicit_location) | |||
882 | linker_error(prog, | |||
883 | "%s shader input `%s' " | |||
884 | "has no matching output in the previous stage\n", | |||
885 | _mesa_shader_stage_to_string(consumer->Stage), | |||
886 | input->name); | |||
887 | } | |||
888 | } | |||
889 | } | |||
890 | } | |||
891 | ||||
892 | /** | |||
893 | * Demote shader inputs and outputs that are not used in other stages, and | |||
894 | * remove them via dead code elimination. | |||
895 | */ | |||
896 | static void | |||
897 | remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, | |||
898 | gl_linked_shader *sh, | |||
899 | enum ir_variable_mode mode) | |||
900 | { | |||
901 | if (is_separate_shader_object) | |||
902 | return; | |||
903 | ||||
904 | foreach_in_list(ir_instruction, node, sh->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((sh-> ir)->head_sentinel.next) ? (ir_instruction *) ((sh->ir) ->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
905 | ir_variable *const var = node->as_variable(); | |||
906 | ||||
907 | if (var == NULL__null || var->data.mode != int(mode)) | |||
908 | continue; | |||
909 | ||||
910 | /* A shader 'in' or 'out' variable is only really an input or output if | |||
911 | * its value is used by other shader stages. This will cause the | |||
912 | * variable to have a location assigned. | |||
913 | */ | |||
914 | if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) { | |||
915 | assert(var->data.mode != ir_var_temporary)(static_cast <bool> (var->data.mode != ir_var_temporary ) ? void (0) : __assert_fail ("var->data.mode != ir_var_temporary" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
916 | ||||
917 | /* Assign zeros to demoted inputs to allow more optimizations. */ | |||
918 | if (var->data.mode == ir_var_shader_in && !var->constant_value) | |||
919 | var->constant_value = ir_constant::zero(var, var->type); | |||
920 | ||||
921 | var->data.mode = ir_var_auto; | |||
922 | } | |||
923 | } | |||
924 | ||||
925 | /* Eliminate code that is now dead due to unused inputs/outputs being | |||
926 | * demoted. | |||
927 | */ | |||
928 | while (do_dead_code(sh->ir, false)) | |||
929 | ; | |||
930 | ||||
931 | } | |||
932 | ||||
933 | /** | |||
934 | * Initialize this object based on a string that was passed to | |||
935 | * glTransformFeedbackVaryings. | |||
936 | * | |||
937 | * If the input is mal-formed, this call still succeeds, but it sets | |||
938 | * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var() | |||
939 | * will fail to find any matching variable. | |||
940 | */ | |||
941 | void | |||
942 | tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx, | |||
943 | const char *input) | |||
944 | { | |||
945 | /* We don't have to be pedantic about what is a valid GLSL variable name, | |||
946 | * because any variable with an invalid name can't exist in the IR anyway. | |||
947 | */ | |||
948 | ||||
949 | this->location = -1; | |||
950 | this->orig_name = input; | |||
951 | this->lowered_builtin_array_variable = none; | |||
952 | this->skip_components = 0; | |||
953 | this->next_buffer_separator = false; | |||
954 | this->matched_candidate = NULL__null; | |||
955 | this->stream_id = 0; | |||
956 | this->buffer = 0; | |||
957 | this->offset = 0; | |||
958 | ||||
959 | if (ctx->Extensions.ARB_transform_feedback3) { | |||
960 | /* Parse gl_NextBuffer. */ | |||
961 | if (strcmp(input, "gl_NextBuffer") == 0) { | |||
962 | this->next_buffer_separator = true; | |||
963 | return; | |||
964 | } | |||
965 | ||||
966 | /* Parse gl_SkipComponents. */ | |||
967 | if (strcmp(input, "gl_SkipComponents1") == 0) | |||
968 | this->skip_components = 1; | |||
969 | else if (strcmp(input, "gl_SkipComponents2") == 0) | |||
970 | this->skip_components = 2; | |||
971 | else if (strcmp(input, "gl_SkipComponents3") == 0) | |||
972 | this->skip_components = 3; | |||
973 | else if (strcmp(input, "gl_SkipComponents4") == 0) | |||
974 | this->skip_components = 4; | |||
975 | ||||
976 | if (this->skip_components) | |||
977 | return; | |||
978 | } | |||
979 | ||||
980 | /* Parse a declaration. */ | |||
981 | const char *base_name_end; | |||
982 | long subscript = parse_program_resource_name(input, &base_name_end); | |||
983 | this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); | |||
984 | if (this->var_name == NULL__null) { | |||
985 | _mesa_error_no_memory(__func____func__); | |||
986 | return; | |||
987 | } | |||
988 | ||||
989 | if (subscript >= 0) { | |||
990 | this->array_subscript = subscript; | |||
991 | this->is_subscripted = true; | |||
992 | } else { | |||
993 | this->is_subscripted = false; | |||
994 | } | |||
995 | ||||
996 | /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this | |||
997 | * class must behave specially to account for the fact that gl_ClipDistance | |||
998 | * is converted from a float[8] to a vec4[2]. | |||
999 | */ | |||
1000 | if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && | |||
1001 | strcmp(this->var_name, "gl_ClipDistance") == 0) { | |||
1002 | this->lowered_builtin_array_variable = clip_distance; | |||
1003 | } | |||
1004 | if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && | |||
1005 | strcmp(this->var_name, "gl_CullDistance") == 0) { | |||
1006 | this->lowered_builtin_array_variable = cull_distance; | |||
1007 | } | |||
1008 | ||||
1009 | if (ctx->Const.LowerTessLevel && | |||
1010 | (strcmp(this->var_name, "gl_TessLevelOuter") == 0)) | |||
1011 | this->lowered_builtin_array_variable = tess_level_outer; | |||
1012 | if (ctx->Const.LowerTessLevel && | |||
1013 | (strcmp(this->var_name, "gl_TessLevelInner") == 0)) | |||
1014 | this->lowered_builtin_array_variable = tess_level_inner; | |||
1015 | } | |||
1016 | ||||
1017 | ||||
1018 | /** | |||
1019 | * Determine whether two tfeedback_decl objects refer to the same variable and | |||
1020 | * array index (if applicable). | |||
1021 | */ | |||
1022 | bool | |||
1023 | tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y) | |||
1024 | { | |||
1025 | assert(x.is_varying() && y.is_varying())(static_cast <bool> (x.is_varying() && y.is_varying ()) ? void (0) : __assert_fail ("x.is_varying() && y.is_varying()" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
1026 | ||||
1027 | if (strcmp(x.var_name, y.var_name) != 0) | |||
1028 | return false; | |||
1029 | if (x.is_subscripted != y.is_subscripted) | |||
1030 | return false; | |||
1031 | if (x.is_subscripted && x.array_subscript != y.array_subscript) | |||
1032 | return false; | |||
1033 | return true; | |||
1034 | } | |||
1035 | ||||
1036 | ||||
1037 | /** | |||
1038 | * Assign a location and stream ID for this tfeedback_decl object based on the | |||
1039 | * transform feedback candidate found by find_candidate. | |||
1040 | * | |||
1041 | * If an error occurs, the error is reported through linker_error() and false | |||
1042 | * is returned. | |||
1043 | */ | |||
1044 | bool | |||
1045 | tfeedback_decl::assign_location(struct gl_context *ctx, | |||
1046 | struct gl_shader_program *prog) | |||
1047 | { | |||
1048 | assert(this->is_varying())(static_cast <bool> (this->is_varying()) ? void (0) : __assert_fail ("this->is_varying()", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
1049 | ||||
1050 | unsigned fine_location | |||
1051 | = this->matched_candidate->toplevel_var->data.location * 4 | |||
1052 | + this->matched_candidate->toplevel_var->data.location_frac | |||
1053 | + this->matched_candidate->offset; | |||
1054 | const unsigned dmul = | |||
1055 | this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1; | |||
1056 | ||||
1057 | if (this->matched_candidate->type->is_array()) { | |||
1058 | /* Array variable */ | |||
1059 | const unsigned matrix_cols = | |||
1060 | this->matched_candidate->type->fields.array->matrix_columns; | |||
1061 | const unsigned vector_elements = | |||
1062 | this->matched_candidate->type->fields.array->vector_elements; | |||
1063 | unsigned actual_array_size; | |||
1064 | switch (this->lowered_builtin_array_variable) { | |||
1065 | case clip_distance: | |||
1066 | actual_array_size = prog->last_vert_prog ? | |||
1067 | prog->last_vert_prog->info.clip_distance_array_size : 0; | |||
1068 | break; | |||
1069 | case cull_distance: | |||
1070 | actual_array_size = prog->last_vert_prog ? | |||
1071 | prog->last_vert_prog->info.cull_distance_array_size : 0; | |||
1072 | break; | |||
1073 | case tess_level_outer: | |||
1074 | actual_array_size = 4; | |||
1075 | break; | |||
1076 | case tess_level_inner: | |||
1077 | actual_array_size = 2; | |||
1078 | break; | |||
1079 | case none: | |||
1080 | default: | |||
1081 | actual_array_size = this->matched_candidate->type->array_size(); | |||
1082 | break; | |||
1083 | } | |||
1084 | ||||
1085 | if (this->is_subscripted) { | |||
1086 | /* Check array bounds. */ | |||
1087 | if (this->array_subscript >= actual_array_size) { | |||
1088 | linker_error(prog, "Transform feedback varying %s has index " | |||
1089 | "%i, but the array size is %u.", | |||
1090 | this->orig_name, this->array_subscript, | |||
1091 | actual_array_size); | |||
1092 | return false; | |||
1093 | } | |||
1094 | unsigned array_elem_size = this->lowered_builtin_array_variable ? | |||
1095 | 1 : vector_elements * matrix_cols * dmul; | |||
1096 | fine_location += array_elem_size * this->array_subscript; | |||
1097 | this->size = 1; | |||
1098 | } else { | |||
1099 | this->size = actual_array_size; | |||
1100 | } | |||
1101 | this->vector_elements = vector_elements; | |||
1102 | this->matrix_columns = matrix_cols; | |||
1103 | if (this->lowered_builtin_array_variable) | |||
1104 | this->type = GL_FLOAT0x1406; | |||
1105 | else | |||
1106 | this->type = this->matched_candidate->type->fields.array->gl_type; | |||
1107 | } else { | |||
1108 | /* Regular variable (scalar, vector, or matrix) */ | |||
1109 | if (this->is_subscripted) { | |||
1110 | linker_error(prog, "Transform feedback varying %s requested, " | |||
1111 | "but %s is not an array.", | |||
1112 | this->orig_name, this->var_name); | |||
1113 | return false; | |||
1114 | } | |||
1115 | this->size = 1; | |||
1116 | this->vector_elements = this->matched_candidate->type->vector_elements; | |||
1117 | this->matrix_columns = this->matched_candidate->type->matrix_columns; | |||
1118 | this->type = this->matched_candidate->type->gl_type; | |||
1119 | } | |||
1120 | this->location = fine_location / 4; | |||
1121 | this->location_frac = fine_location % 4; | |||
1122 | ||||
1123 | /* From GL_EXT_transform_feedback: | |||
1124 | * A program will fail to link if: | |||
1125 | * | |||
1126 | * * the total number of components to capture in any varying | |||
1127 | * variable in <varyings> is greater than the constant | |||
1128 | * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the | |||
1129 | * buffer mode is SEPARATE_ATTRIBS_EXT; | |||
1130 | */ | |||
1131 | if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS0x8C8D && | |||
1132 | this->num_components() > | |||
1133 | ctx->Const.MaxTransformFeedbackSeparateComponents) { | |||
1134 | linker_error(prog, "Transform feedback varying %s exceeds " | |||
1135 | "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", | |||
1136 | this->orig_name); | |||
1137 | return false; | |||
1138 | } | |||
1139 | ||||
1140 | /* Only transform feedback varyings can be assigned to non-zero streams, | |||
1141 | * so assign the stream id here. | |||
1142 | */ | |||
1143 | this->stream_id = this->matched_candidate->toplevel_var->data.stream; | |||
1144 | ||||
1145 | unsigned array_offset = this->array_subscript * 4 * dmul; | |||
1146 | unsigned struct_offset = this->matched_candidate->offset * 4 * dmul; | |||
1147 | this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer; | |||
1148 | this->offset = this->matched_candidate->toplevel_var->data.offset + | |||
1149 | array_offset + struct_offset; | |||
1150 | ||||
1151 | return true; | |||
1152 | } | |||
1153 | ||||
1154 | ||||
1155 | unsigned | |||
1156 | tfeedback_decl::get_num_outputs() const | |||
1157 | { | |||
1158 | if (!this->is_varying()) { | |||
1159 | return 0; | |||
1160 | } | |||
1161 | return (this->num_components() + this->location_frac + 3)/4; | |||
1162 | } | |||
1163 | ||||
1164 | ||||
1165 | /** | |||
1166 | * Update gl_transform_feedback_info to reflect this tfeedback_decl. | |||
1167 | * | |||
1168 | * If an error occurs, the error is reported through linker_error() and false | |||
1169 | * is returned. | |||
1170 | */ | |||
1171 | bool | |||
1172 | tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, | |||
1173 | struct gl_transform_feedback_info *info, | |||
1174 | unsigned buffer, unsigned buffer_index, | |||
1175 | const unsigned max_outputs, | |||
1176 | BITSET_WORDunsigned int *used_components[MAX_FEEDBACK_BUFFERS4], | |||
1177 | bool *explicit_stride, bool has_xfb_qualifiers, | |||
1178 | const void* mem_ctx) const | |||
1179 | { | |||
1180 | unsigned xfb_offset = 0; | |||
1181 | unsigned size = this->size; | |||
1182 | /* Handle gl_SkipComponents. */ | |||
1183 | if (this->skip_components) { | |||
1184 | info->Buffers[buffer].Stride += this->skip_components; | |||
1185 | size = this->skip_components; | |||
1186 | goto store_varying; | |||
1187 | } | |||
1188 | ||||
1189 | if (this->next_buffer_separator) { | |||
1190 | size = 0; | |||
1191 | goto store_varying; | |||
1192 | } | |||
1193 | ||||
1194 | if (has_xfb_qualifiers) { | |||
1195 | xfb_offset = this->offset / 4; | |||
1196 | } else { | |||
1197 | xfb_offset = info->Buffers[buffer].Stride; | |||
1198 | } | |||
1199 | info->Varyings[info->NumVarying].Offset = xfb_offset * 4; | |||
1200 | ||||
1201 | { | |||
1202 | unsigned location = this->location; | |||
1203 | unsigned location_frac = this->location_frac; | |||
1204 | unsigned num_components = this->num_components(); | |||
1205 | ||||
1206 | /* From GL_EXT_transform_feedback: | |||
1207 | * | |||
1208 | * " A program will fail to link if: | |||
1209 | * | |||
1210 | * * the total number of components to capture is greater than the | |||
1211 | * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT | |||
1212 | * and the buffer mode is INTERLEAVED_ATTRIBS_EXT." | |||
1213 | * | |||
1214 | * From GL_ARB_enhanced_layouts: | |||
1215 | * | |||
1216 | * " The resulting stride (implicit or explicit) must be less than or | |||
1217 | * equal to the implementation-dependent constant | |||
1218 | * gl_MaxTransformFeedbackInterleavedComponents." | |||
1219 | */ | |||
1220 | if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS0x8C8C || | |||
1221 | has_xfb_qualifiers) && | |||
1222 | xfb_offset + num_components > | |||
1223 | ctx->Const.MaxTransformFeedbackInterleavedComponents) { | |||
1224 | linker_error(prog, | |||
1225 | "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " | |||
1226 | "limit has been exceeded."); | |||
1227 | return false; | |||
1228 | } | |||
1229 | ||||
1230 | /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers, | |||
1231 | * Page 76, (Transform Feedback Layout Qualifiers): | |||
1232 | * | |||
1233 | * " No aliasing in output buffers is allowed: It is a compile-time or | |||
1234 | * link-time error to specify variables with overlapping transform | |||
1235 | * feedback offsets." | |||
1236 | */ | |||
1237 | const unsigned max_components = | |||
1238 | ctx->Const.MaxTransformFeedbackInterleavedComponents; | |||
1239 | const unsigned first_component = xfb_offset; | |||
1240 | const unsigned last_component = xfb_offset + num_components - 1; | |||
1241 | const unsigned start_word = BITSET_BITWORD(first_component)((first_component) / (sizeof (unsigned int) * 8)); | |||
1242 | const unsigned end_word = BITSET_BITWORD(last_component)((last_component) / (sizeof (unsigned int) * 8)); | |||
1243 | BITSET_WORDunsigned int *used; | |||
1244 | assert(last_component < max_components)(static_cast <bool> (last_component < max_components ) ? void (0) : __assert_fail ("last_component < max_components" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
1245 | ||||
1246 | if (!used_components[buffer]) { | |||
1247 | used_components[buffer] = | |||
1248 | rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components))((unsigned int *) rzalloc_array_size(mem_ctx, sizeof(unsigned int), (((max_components) + (sizeof (unsigned int) * 8) - 1) / (sizeof (unsigned int) * 8)))); | |||
1249 | } | |||
1250 | used = used_components[buffer]; | |||
1251 | ||||
1252 | for (unsigned word = start_word; word <= end_word; word++) { | |||
1253 | unsigned start_range = 0; | |||
1254 | unsigned end_range = BITSET_WORDBITS(sizeof (unsigned int) * 8) - 1; | |||
1255 | ||||
1256 | if (word == start_word) | |||
1257 | start_range = first_component % BITSET_WORDBITS(sizeof (unsigned int) * 8); | |||
1258 | ||||
1259 | if (word == end_word) | |||
1260 | end_range = last_component % BITSET_WORDBITS(sizeof (unsigned int) * 8); | |||
1261 | ||||
1262 | if (used[word] & BITSET_RANGE(start_range, end_range)((((((end_range) + 1) % (sizeof (unsigned int) * 8) == 0) ? ~ 0 : (1u << (((end_range) + 1) % (sizeof (unsigned int) * 8))) - 1)) & ~((1u << ((start_range) % (sizeof (unsigned int) * 8))) - 1))) { | |||
1263 | linker_error(prog, | |||
1264 | "variable '%s', xfb_offset (%d) is causing aliasing.", | |||
1265 | this->orig_name, xfb_offset * 4); | |||
1266 | return false; | |||
1267 | } | |||
1268 | used[word] |= BITSET_RANGE(start_range, end_range)((((((end_range) + 1) % (sizeof (unsigned int) * 8) == 0) ? ~ 0 : (1u << (((end_range) + 1) % (sizeof (unsigned int) * 8))) - 1)) & ~((1u << ((start_range) % (sizeof (unsigned int) * 8))) - 1)); | |||
1269 | } | |||
1270 | ||||
1271 | while (num_components > 0) { | |||
1272 | unsigned output_size = MIN2(num_components, 4 - location_frac)( (num_components)<(4 - location_frac) ? (num_components) : (4 - location_frac) ); | |||
1273 | assert((info->NumOutputs == 0 && max_outputs == 0) ||(static_cast <bool> ((info->NumOutputs == 0 && max_outputs == 0) || info->NumOutputs < max_outputs) ? void (0) : __assert_fail ("(info->NumOutputs == 0 && max_outputs == 0) || info->NumOutputs < max_outputs" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )) | |||
1274 | info->NumOutputs < max_outputs)(static_cast <bool> ((info->NumOutputs == 0 && max_outputs == 0) || info->NumOutputs < max_outputs) ? void (0) : __assert_fail ("(info->NumOutputs == 0 && max_outputs == 0) || info->NumOutputs < max_outputs" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
1275 | ||||
1276 | /* From the ARB_enhanced_layouts spec: | |||
1277 | * | |||
1278 | * "If such a block member or variable is not written during a shader | |||
1279 | * invocation, the buffer contents at the assigned offset will be | |||
1280 | * undefined. Even if there are no static writes to a variable or | |||
1281 | * member that is assigned a transform feedback offset, the space is | |||
1282 | * still allocated in the buffer and still affects the stride." | |||
1283 | */ | |||
1284 | if (this->is_varying_written()) { | |||
1285 | info->Outputs[info->NumOutputs].ComponentOffset = location_frac; | |||
1286 | info->Outputs[info->NumOutputs].OutputRegister = location; | |||
1287 | info->Outputs[info->NumOutputs].NumComponents = output_size; | |||
1288 | info->Outputs[info->NumOutputs].StreamId = stream_id; | |||
1289 | info->Outputs[info->NumOutputs].OutputBuffer = buffer; | |||
1290 | info->Outputs[info->NumOutputs].DstOffset = xfb_offset; | |||
1291 | ++info->NumOutputs; | |||
1292 | } | |||
1293 | info->Buffers[buffer].Stream = this->stream_id; | |||
1294 | xfb_offset += output_size; | |||
1295 | ||||
1296 | num_components -= output_size; | |||
1297 | location++; | |||
1298 | location_frac = 0; | |||
1299 | } | |||
1300 | } | |||
1301 | ||||
1302 | if (explicit_stride && explicit_stride[buffer]) { | |||
1303 | if (this->is_64bit() && info->Buffers[buffer].Stride % 2) { | |||
1304 | linker_error(prog, "invalid qualifier xfb_stride=%d must be a " | |||
1305 | "multiple of 8 as its applied to a type that is or " | |||
1306 | "contains a double.", | |||
1307 | info->Buffers[buffer].Stride * 4); | |||
1308 | return false; | |||
1309 | } | |||
1310 | ||||
1311 | if (xfb_offset > info->Buffers[buffer].Stride) { | |||
1312 | linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for " | |||
1313 | "buffer (%d)", xfb_offset * 4, | |||
1314 | info->Buffers[buffer].Stride * 4, buffer); | |||
1315 | return false; | |||
1316 | } | |||
1317 | } else { | |||
1318 | info->Buffers[buffer].Stride = xfb_offset; | |||
1319 | } | |||
1320 | ||||
1321 | store_varying: | |||
1322 | info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, | |||
1323 | this->orig_name); | |||
1324 | info->Varyings[info->NumVarying].Type = this->type; | |||
1325 | info->Varyings[info->NumVarying].Size = size; | |||
1326 | info->Varyings[info->NumVarying].BufferIndex = buffer_index; | |||
1327 | info->NumVarying++; | |||
1328 | info->Buffers[buffer].NumVaryings++; | |||
1329 | ||||
1330 | return true; | |||
1331 | } | |||
1332 | ||||
1333 | ||||
1334 | const tfeedback_candidate * | |||
1335 | tfeedback_decl::find_candidate(gl_shader_program *prog, | |||
1336 | hash_table *tfeedback_candidates) | |||
1337 | { | |||
1338 | const char *name = this->var_name; | |||
1339 | switch (this->lowered_builtin_array_variable) { | |||
1340 | case none: | |||
1341 | name = this->var_name; | |||
1342 | break; | |||
1343 | case clip_distance: | |||
1344 | name = "gl_ClipDistanceMESA"; | |||
1345 | break; | |||
1346 | case cull_distance: | |||
1347 | name = "gl_CullDistanceMESA"; | |||
1348 | break; | |||
1349 | case tess_level_outer: | |||
1350 | name = "gl_TessLevelOuterMESA"; | |||
1351 | break; | |||
1352 | case tess_level_inner: | |||
1353 | name = "gl_TessLevelInnerMESA"; | |||
1354 | break; | |||
1355 | } | |||
1356 | hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name); | |||
1357 | ||||
1358 | this->matched_candidate = entry ? | |||
1359 | (const tfeedback_candidate *) entry->data : NULL__null; | |||
1360 | ||||
1361 | if (!this->matched_candidate) { | |||
1362 | /* From GL_EXT_transform_feedback: | |||
1363 | * A program will fail to link if: | |||
1364 | * | |||
1365 | * * any variable name specified in the <varyings> array is not | |||
1366 | * declared as an output in the geometry shader (if present) or | |||
1367 | * the vertex shader (if no geometry shader is present); | |||
1368 | */ | |||
1369 | linker_error(prog, "Transform feedback varying %s undeclared.", | |||
1370 | this->orig_name); | |||
1371 | } | |||
1372 | ||||
1373 | return this->matched_candidate; | |||
1374 | } | |||
1375 | ||||
1376 | /** | |||
1377 | * Force a candidate over the previously matched one. It happens when a new | |||
1378 | * varying needs to be created to match the xfb declaration, for example, | |||
1379 | * to fullfil an alignment criteria. | |||
1380 | */ | |||
1381 | void | |||
1382 | tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate) | |||
1383 | { | |||
1384 | this->matched_candidate = candidate; | |||
1385 | ||||
1386 | /* The subscript part is no longer relevant */ | |||
1387 | this->is_subscripted = false; | |||
1388 | this->array_subscript = 0; | |||
1389 | } | |||
1390 | ||||
1391 | ||||
1392 | /** | |||
1393 | * Parse all the transform feedback declarations that were passed to | |||
1394 | * glTransformFeedbackVaryings() and store them in tfeedback_decl objects. | |||
1395 | * | |||
1396 | * If an error occurs, the error is reported through linker_error() and false | |||
1397 | * is returned. | |||
1398 | */ | |||
1399 | static bool | |||
1400 | parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, | |||
1401 | const void *mem_ctx, unsigned num_names, | |||
1402 | char **varying_names, tfeedback_decl *decls) | |||
1403 | { | |||
1404 | for (unsigned i = 0; i < num_names; ++i) { | |||
1405 | decls[i].init(ctx, mem_ctx, varying_names[i]); | |||
1406 | ||||
1407 | if (!decls[i].is_varying()) | |||
1408 | continue; | |||
1409 | ||||
1410 | /* From GL_EXT_transform_feedback: | |||
1411 | * A program will fail to link if: | |||
1412 | * | |||
1413 | * * any two entries in the <varyings> array specify the same varying | |||
1414 | * variable; | |||
1415 | * | |||
1416 | * We interpret this to mean "any two entries in the <varyings> array | |||
1417 | * specify the same varying variable and array index", since transform | |||
1418 | * feedback of arrays would be useless otherwise. | |||
1419 | */ | |||
1420 | for (unsigned j = 0; j < i; ++j) { | |||
1421 | if (decls[j].is_varying()) { | |||
1422 | if (tfeedback_decl::is_same(decls[i], decls[j])) { | |||
1423 | linker_error(prog, "Transform feedback varying %s specified " | |||
1424 | "more than once.", varying_names[i]); | |||
1425 | return false; | |||
1426 | } | |||
1427 | } | |||
1428 | } | |||
1429 | } | |||
1430 | return true; | |||
1431 | } | |||
1432 | ||||
1433 | ||||
1434 | static int | |||
1435 | cmp_xfb_offset(const void * x_generic, const void * y_generic) | |||
1436 | { | |||
1437 | tfeedback_decl *x = (tfeedback_decl *) x_generic; | |||
1438 | tfeedback_decl *y = (tfeedback_decl *) y_generic; | |||
1439 | ||||
1440 | if (x->get_buffer() != y->get_buffer()) | |||
1441 | return x->get_buffer() - y->get_buffer(); | |||
1442 | return x->get_offset() - y->get_offset(); | |||
1443 | } | |||
1444 | ||||
1445 | /** | |||
1446 | * Store transform feedback location assignments into | |||
1447 | * prog->sh.LinkedTransformFeedback based on the data stored in | |||
1448 | * tfeedback_decls. | |||
1449 | * | |||
1450 | * If an error occurs, the error is reported through linker_error() and false | |||
1451 | * is returned. | |||
1452 | */ | |||
1453 | static bool | |||
1454 | store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, | |||
1455 | unsigned num_tfeedback_decls, | |||
1456 | tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers, | |||
1457 | const void *mem_ctx) | |||
1458 | { | |||
1459 | if (!prog->last_vert_prog) | |||
1460 | return true; | |||
1461 | ||||
1462 | /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for | |||
1463 | * tracking the number of buffers doesn't overflow. | |||
1464 | */ | |||
1465 | assert(ctx->Const.MaxTransformFeedbackBuffers < 32)(static_cast <bool> (ctx->Const.MaxTransformFeedbackBuffers < 32) ? void (0) : __assert_fail ("ctx->Const.MaxTransformFeedbackBuffers < 32" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
1466 | ||||
1467 | bool separate_attribs_mode = | |||
1468 | prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS0x8C8D; | |||
1469 | ||||
1470 | struct gl_program *xfb_prog = prog->last_vert_prog; | |||
1471 | xfb_prog->sh.LinkedTransformFeedback = | |||
1472 | rzalloc(xfb_prog, struct gl_transform_feedback_info)((struct gl_transform_feedback_info *) rzalloc_size(xfb_prog, sizeof(struct gl_transform_feedback_info))); | |||
1473 | ||||
1474 | /* The xfb_offset qualifier does not have to be used in increasing order | |||
1475 | * however some drivers expect to receive the list of transform feedback | |||
1476 | * declarations in order so sort it now for convenience. | |||
1477 | */ | |||
1478 | if (has_xfb_qualifiers) { | |||
1479 | qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls), | |||
1480 | cmp_xfb_offset); | |||
1481 | } | |||
1482 | ||||
1483 | xfb_prog->sh.LinkedTransformFeedback->Varyings = | |||
1484 | rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,((struct gl_transform_feedback_varying_info *) rzalloc_array_size (xfb_prog, sizeof(struct gl_transform_feedback_varying_info), num_tfeedback_decls)) | |||
1485 | num_tfeedback_decls)((struct gl_transform_feedback_varying_info *) rzalloc_array_size (xfb_prog, sizeof(struct gl_transform_feedback_varying_info), num_tfeedback_decls)); | |||
1486 | ||||
1487 | unsigned num_outputs = 0; | |||
1488 | for (unsigned i = 0; i < num_tfeedback_decls; ++i) { | |||
1489 | if (tfeedback_decls[i].is_varying_written()) | |||
1490 | num_outputs += tfeedback_decls[i].get_num_outputs(); | |||
1491 | } | |||
1492 | ||||
1493 | xfb_prog->sh.LinkedTransformFeedback->Outputs = | |||
1494 | rzalloc_array(xfb_prog, struct gl_transform_feedback_output,((struct gl_transform_feedback_output *) rzalloc_array_size(xfb_prog , sizeof(struct gl_transform_feedback_output), num_outputs)) | |||
1495 | num_outputs)((struct gl_transform_feedback_output *) rzalloc_array_size(xfb_prog , sizeof(struct gl_transform_feedback_output), num_outputs)); | |||
1496 | ||||
1497 | unsigned num_buffers = 0; | |||
1498 | unsigned buffers = 0; | |||
1499 | BITSET_WORDunsigned int *used_components[MAX_FEEDBACK_BUFFERS4] = {}; | |||
1500 | ||||
1501 | if (!has_xfb_qualifiers && separate_attribs_mode) { | |||
1502 | /* GL_SEPARATE_ATTRIBS */ | |||
1503 | for (unsigned i = 0; i < num_tfeedback_decls; ++i) { | |||
1504 | if (!tfeedback_decls[i].store(ctx, prog, | |||
1505 | xfb_prog->sh.LinkedTransformFeedback, | |||
1506 | num_buffers, num_buffers, num_outputs, | |||
1507 | used_components, NULL__null, | |||
1508 | has_xfb_qualifiers, mem_ctx)) | |||
1509 | return false; | |||
1510 | ||||
1511 | buffers |= 1 << num_buffers; | |||
1512 | num_buffers++; | |||
1513 | } | |||
1514 | } | |||
1515 | else { | |||
1516 | /* GL_INVERLEAVED_ATTRIBS */ | |||
1517 | int buffer_stream_id = -1; | |||
1518 | unsigned buffer = | |||
1519 | num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0; | |||
1520 | bool explicit_stride[MAX_FEEDBACK_BUFFERS4] = { false }; | |||
1521 | ||||
1522 | /* Apply any xfb_stride global qualifiers */ | |||
1523 | if (has_xfb_qualifiers) { | |||
1524 | for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS4; j++) { | |||
1525 | if (prog->TransformFeedback.BufferStride[j]) { | |||
1526 | explicit_stride[j] = true; | |||
1527 | xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride = | |||
1528 | prog->TransformFeedback.BufferStride[j] / 4; | |||
1529 | } | |||
1530 | } | |||
1531 | } | |||
1532 | ||||
1533 | for (unsigned i = 0; i < num_tfeedback_decls; ++i) { | |||
1534 | if (has_xfb_qualifiers && | |||
1535 | buffer != tfeedback_decls[i].get_buffer()) { | |||
1536 | /* we have moved to the next buffer so reset stream id */ | |||
1537 | buffer_stream_id = -1; | |||
1538 | num_buffers++; | |||
1539 | } | |||
1540 | ||||
1541 | if (tfeedback_decls[i].is_next_buffer_separator()) { | |||
1542 | if (!tfeedback_decls[i].store(ctx, prog, | |||
1543 | xfb_prog->sh.LinkedTransformFeedback, | |||
1544 | buffer, num_buffers, num_outputs, | |||
1545 | used_components, explicit_stride, | |||
1546 | has_xfb_qualifiers, mem_ctx)) | |||
1547 | return false; | |||
1548 | num_buffers++; | |||
1549 | buffer_stream_id = -1; | |||
1550 | continue; | |||
1551 | } | |||
1552 | ||||
1553 | if (has_xfb_qualifiers) { | |||
1554 | buffer = tfeedback_decls[i].get_buffer(); | |||
1555 | } else { | |||
1556 | buffer = num_buffers; | |||
1557 | } | |||
1558 | ||||
1559 | if (tfeedback_decls[i].is_varying()) { | |||
1560 | if (buffer_stream_id == -1) { | |||
1561 | /* First varying writing to this buffer: remember its stream */ | |||
1562 | buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); | |||
1563 | ||||
1564 | /* Only mark a buffer as active when there is a varying | |||
1565 | * attached to it. This behaviour is based on a revised version | |||
1566 | * of section 13.2.2 of the GL 4.6 spec. | |||
1567 | */ | |||
1568 | buffers |= 1 << buffer; | |||
1569 | } else if (buffer_stream_id != | |||
1570 | (int) tfeedback_decls[i].get_stream_id()) { | |||
1571 | /* Varying writes to the same buffer from a different stream */ | |||
1572 | linker_error(prog, | |||
1573 | "Transform feedback can't capture varyings belonging " | |||
1574 | "to different vertex streams in a single buffer. " | |||
1575 | "Varying %s writes to buffer from stream %u, other " | |||
1576 | "varyings in the same buffer write from stream %u.", | |||
1577 | tfeedback_decls[i].name(), | |||
1578 | tfeedback_decls[i].get_stream_id(), | |||
1579 | buffer_stream_id); | |||
1580 | return false; | |||
1581 | } | |||
1582 | } | |||
1583 | ||||
1584 | if (!tfeedback_decls[i].store(ctx, prog, | |||
1585 | xfb_prog->sh.LinkedTransformFeedback, | |||
1586 | buffer, num_buffers, num_outputs, | |||
1587 | used_components, explicit_stride, | |||
1588 | has_xfb_qualifiers, mem_ctx)) | |||
1589 | return false; | |||
1590 | } | |||
1591 | } | |||
1592 | ||||
1593 | assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs)(static_cast <bool> (xfb_prog->sh.LinkedTransformFeedback ->NumOutputs == num_outputs) ? void (0) : __assert_fail ("xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
1594 | ||||
1595 | xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers; | |||
1596 | return true; | |||
1597 | } | |||
1598 | ||||
1599 | namespace { | |||
1600 | ||||
1601 | /** | |||
1602 | * Data structure recording the relationship between outputs of one shader | |||
1603 | * stage (the "producer") and inputs of another (the "consumer"). | |||
1604 | */ | |||
1605 | class varying_matches | |||
1606 | { | |||
1607 | public: | |||
1608 | varying_matches(bool disable_varying_packing, | |||
1609 | bool disable_xfb_packing, | |||
1610 | bool xfb_enabled, | |||
1611 | bool enhanced_layouts_enabled, | |||
1612 | gl_shader_stage producer_stage, | |||
1613 | gl_shader_stage consumer_stage); | |||
1614 | ~varying_matches(); | |||
1615 | void record(ir_variable *producer_var, ir_variable *consumer_var); | |||
1616 | unsigned assign_locations(struct gl_shader_program *prog, | |||
1617 | uint8_t components[], | |||
1618 | uint64_t reserved_slots); | |||
1619 | void store_locations() const; | |||
1620 | ||||
1621 | private: | |||
1622 | bool is_varying_packing_safe(const glsl_type *type, | |||
1623 | const ir_variable *var) const; | |||
1624 | ||||
1625 | /** | |||
1626 | * If true, this driver disables varying packing, so all varyings need to | |||
1627 | * be aligned on slot boundaries, and take up a number of slots equal to | |||
1628 | * their number of matrix columns times their array size. | |||
1629 | * | |||
1630 | * Packing may also be disabled because our current packing method is not | |||
1631 | * safe in SSO or versions of OpenGL where interpolation qualifiers are not | |||
1632 | * guaranteed to match across stages. | |||
1633 | */ | |||
1634 | const bool disable_varying_packing; | |||
1635 | ||||
1636 | /** | |||
1637 | * If true, this driver disables packing for varyings used by transform | |||
1638 | * feedback. | |||
1639 | */ | |||
1640 | const bool disable_xfb_packing; | |||
1641 | ||||
1642 | /** | |||
1643 | * If true, this driver has transform feedback enabled. The transform | |||
1644 | * feedback code usually requires at least some packing be done even | |||
1645 | * when varying packing is disabled, fortunately where transform feedback | |||
1646 | * requires packing it's safe to override the disabled setting. See | |||
1647 | * is_varying_packing_safe(). | |||
1648 | */ | |||
1649 | const bool xfb_enabled; | |||
1650 | ||||
1651 | const bool enhanced_layouts_enabled; | |||
1652 | ||||
1653 | /** | |||
1654 | * Enum representing the order in which varyings are packed within a | |||
1655 | * packing class. | |||
1656 | * | |||
1657 | * Currently we pack vec4's first, then vec2's, then scalar values, then | |||
1658 | * vec3's. This order ensures that the only vectors that are at risk of | |||
1659 | * having to be "double parked" (split between two adjacent varying slots) | |||
1660 | * are the vec3's. | |||
1661 | */ | |||
1662 | enum packing_order_enum { | |||
1663 | PACKING_ORDER_VEC4, | |||
1664 | PACKING_ORDER_VEC2, | |||
1665 | PACKING_ORDER_SCALAR, | |||
1666 | PACKING_ORDER_VEC3, | |||
1667 | }; | |||
1668 | ||||
1669 | static unsigned compute_packing_class(const ir_variable *var); | |||
1670 | static packing_order_enum compute_packing_order(const ir_variable *var); | |||
1671 | static int match_comparator(const void *x_generic, const void *y_generic); | |||
1672 | static int xfb_comparator(const void *x_generic, const void *y_generic); | |||
1673 | static int not_xfb_comparator(const void *x_generic, const void *y_generic); | |||
1674 | ||||
1675 | /** | |||
1676 | * Structure recording the relationship between a single producer output | |||
1677 | * and a single consumer input. | |||
1678 | */ | |||
1679 | struct match { | |||
1680 | /** | |||
1681 | * Packing class for this varying, computed by compute_packing_class(). | |||
1682 | */ | |||
1683 | unsigned packing_class; | |||
1684 | ||||
1685 | /** | |||
1686 | * Packing order for this varying, computed by compute_packing_order(). | |||
1687 | */ | |||
1688 | packing_order_enum packing_order; | |||
1689 | unsigned num_components; | |||
1690 | ||||
1691 | /** | |||
1692 | * The output variable in the producer stage. | |||
1693 | */ | |||
1694 | ir_variable *producer_var; | |||
1695 | ||||
1696 | /** | |||
1697 | * The input variable in the consumer stage. | |||
1698 | */ | |||
1699 | ir_variable *consumer_var; | |||
1700 | ||||
1701 | /** | |||
1702 | * The location which has been assigned for this varying. This is | |||
1703 | * expressed in multiples of a float, with the first generic varying | |||
1704 | * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the | |||
1705 | * value 0. | |||
1706 | */ | |||
1707 | unsigned generic_location; | |||
1708 | } *matches; | |||
1709 | ||||
1710 | /** | |||
1711 | * The number of elements in the \c matches array that are currently in | |||
1712 | * use. | |||
1713 | */ | |||
1714 | unsigned num_matches; | |||
1715 | ||||
1716 | /** | |||
1717 | * The number of elements that were set aside for the \c matches array when | |||
1718 | * it was allocated. | |||
1719 | */ | |||
1720 | unsigned matches_capacity; | |||
1721 | ||||
1722 | gl_shader_stage producer_stage; | |||
1723 | gl_shader_stage consumer_stage; | |||
1724 | }; | |||
1725 | ||||
1726 | } /* anonymous namespace */ | |||
1727 | ||||
1728 | varying_matches::varying_matches(bool disable_varying_packing, | |||
1729 | bool disable_xfb_packing, | |||
1730 | bool xfb_enabled, | |||
1731 | bool enhanced_layouts_enabled, | |||
1732 | gl_shader_stage producer_stage, | |||
1733 | gl_shader_stage consumer_stage) | |||
1734 | : disable_varying_packing(disable_varying_packing), | |||
1735 | disable_xfb_packing(disable_xfb_packing), | |||
1736 | xfb_enabled(xfb_enabled), | |||
1737 | enhanced_layouts_enabled(enhanced_layouts_enabled), | |||
1738 | producer_stage(producer_stage), | |||
1739 | consumer_stage(consumer_stage) | |||
1740 | { | |||
1741 | /* Note: this initial capacity is rather arbitrarily chosen to be large | |||
1742 | * enough for many cases without wasting an unreasonable amount of space. | |||
1743 | * varying_matches::record() will resize the array if there are more than | |||
1744 | * this number of varyings. | |||
1745 | */ | |||
1746 | this->matches_capacity = 8; | |||
1747 | this->matches = (match *) | |||
1748 | malloc(sizeof(*this->matches) * this->matches_capacity); | |||
1749 | this->num_matches = 0; | |||
1750 | } | |||
1751 | ||||
1752 | ||||
1753 | varying_matches::~varying_matches() | |||
1754 | { | |||
1755 | free(this->matches); | |||
1756 | } | |||
1757 | ||||
1758 | ||||
1759 | /** | |||
1760 | * Packing is always safe on individual arrays, structures, and matrices. It | |||
1761 | * is also safe if the varying is only used for transform feedback. | |||
1762 | */ | |||
1763 | bool | |||
1764 | varying_matches::is_varying_packing_safe(const glsl_type *type, | |||
1765 | const ir_variable *var) const | |||
1766 | { | |||
1767 | if (consumer_stage == MESA_SHADER_TESS_EVAL || | |||
1768 | consumer_stage == MESA_SHADER_TESS_CTRL || | |||
1769 | producer_stage == MESA_SHADER_TESS_CTRL) | |||
1770 | return false; | |||
1771 | ||||
1772 | return xfb_enabled && (type->is_array() || type->is_struct() || | |||
1773 | type->is_matrix() || var->data.is_xfb_only); | |||
1774 | } | |||
1775 | ||||
1776 | ||||
1777 | /** | |||
1778 | * Record the given producer/consumer variable pair in the list of variables | |||
1779 | * that should later be assigned locations. | |||
1780 | * | |||
1781 | * It is permissible for \c consumer_var to be NULL (this happens if a | |||
1782 | * variable is output by the producer and consumed by transform feedback, but | |||
1783 | * not consumed by the consumer). | |||
1784 | * | |||
1785 | * If \c producer_var has already been paired up with a consumer_var, or | |||
1786 | * producer_var is part of fixed pipeline functionality (and hence already has | |||
1787 | * a location assigned), this function has no effect. | |||
1788 | * | |||
1789 | * Note: as a side effect this function may change the interpolation type of | |||
1790 | * \c producer_var, but only when the change couldn't possibly affect | |||
1791 | * rendering. | |||
1792 | */ | |||
1793 | void | |||
1794 | varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) | |||
1795 | { | |||
1796 | assert(producer_var != NULL || consumer_var != NULL)(static_cast <bool> (producer_var != __null || consumer_var != __null) ? void (0) : __assert_fail ("producer_var != NULL || consumer_var != NULL" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
1797 | ||||
1798 | if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || | |||
1799 | producer_var->data.explicit_location)) || | |||
1800 | (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || | |||
1801 | consumer_var->data.explicit_location))) { | |||
1802 | /* Either a location already exists for this variable (since it is part | |||
1803 | * of fixed functionality), or it has already been recorded as part of a | |||
1804 | * previous match. | |||
1805 | */ | |||
1806 | return; | |||
1807 | } | |||
1808 | ||||
1809 | bool needs_flat_qualifier = consumer_var == NULL__null && | |||
1810 | (producer_var->type->contains_integer() || | |||
1811 | producer_var->type->contains_double()); | |||
1812 | ||||
1813 | if (!disable_varying_packing && | |||
1814 | (!disable_xfb_packing || producer_var == NULL__null || !producer_var->data.is_xfb) && | |||
1815 | (needs_flat_qualifier || | |||
1816 | (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) { | |||
1817 | /* Since this varying is not being consumed by the fragment shader, its | |||
1818 | * interpolation type varying cannot possibly affect rendering. | |||
1819 | * Also, this variable is non-flat and is (or contains) an integer | |||
1820 | * or a double. | |||
1821 | * If the consumer stage is unknown, don't modify the interpolation | |||
1822 | * type as it could affect rendering later with separate shaders. | |||
1823 | * | |||
1824 | * lower_packed_varyings requires all integer varyings to flat, | |||
1825 | * regardless of where they appear. We can trivially satisfy that | |||
1826 | * requirement by changing the interpolation type to flat here. | |||
1827 | */ | |||
1828 | if (producer_var) { | |||
1829 | producer_var->data.centroid = false; | |||
1830 | producer_var->data.sample = false; | |||
1831 | producer_var->data.interpolation = INTERP_MODE_FLAT; | |||
1832 | } | |||
1833 | ||||
1834 | if (consumer_var) { | |||
1835 | consumer_var->data.centroid = false; | |||
1836 | consumer_var->data.sample = false; | |||
1837 | consumer_var->data.interpolation = INTERP_MODE_FLAT; | |||
1838 | } | |||
1839 | } | |||
1840 | ||||
1841 | if (this->num_matches == this->matches_capacity) { | |||
1842 | this->matches_capacity *= 2; | |||
1843 | this->matches = (match *) | |||
1844 | realloc(this->matches, | |||
1845 | sizeof(*this->matches) * this->matches_capacity); | |||
1846 | } | |||
1847 | ||||
1848 | /* We must use the consumer to compute the packing class because in GL4.4+ | |||
1849 | * there is no guarantee interpolation qualifiers will match across stages. | |||
1850 | * | |||
1851 | * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: | |||
1852 | * | |||
1853 | * "The type and presence of interpolation qualifiers of variables with | |||
1854 | * the same name declared in all linked shaders for the same cross-stage | |||
1855 | * interface must match, otherwise the link command will fail. | |||
1856 | * | |||
1857 | * When comparing an output from one stage to an input of a subsequent | |||
1858 | * stage, the input and output don't match if their interpolation | |||
1859 | * qualifiers (or lack thereof) are not the same." | |||
1860 | * | |||
1861 | * This text was also in at least revison 7 of the 4.40 spec but is no | |||
1862 | * longer in revision 9 and not in the 4.50 spec. | |||
1863 | */ | |||
1864 | const ir_variable *const var = (consumer_var != NULL__null) | |||
1865 | ? consumer_var : producer_var; | |||
1866 | const gl_shader_stage stage = (consumer_var != NULL__null) | |||
1867 | ? consumer_stage : producer_stage; | |||
1868 | const glsl_type *type = get_varying_type(var, stage); | |||
1869 | ||||
1870 | if (producer_var && consumer_var && | |||
1871 | consumer_var->data.must_be_shader_input) { | |||
1872 | producer_var->data.must_be_shader_input = 1; | |||
1873 | } | |||
1874 | ||||
1875 | this->matches[this->num_matches].packing_class | |||
1876 | = this->compute_packing_class(var); | |||
1877 | this->matches[this->num_matches].packing_order | |||
1878 | = this->compute_packing_order(var); | |||
1879 | if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) || | |||
1880 | (this->disable_xfb_packing && var->data.is_xfb) || | |||
1881 | var->data.must_be_shader_input) { | |||
1882 | unsigned slots = type->count_attribute_slots(false); | |||
1883 | this->matches[this->num_matches].num_components = slots * 4; | |||
1884 | } else { | |||
1885 | this->matches[this->num_matches].num_components | |||
1886 | = type->component_slots(); | |||
1887 | } | |||
1888 | ||||
1889 | this->matches[this->num_matches].producer_var = producer_var; | |||
1890 | this->matches[this->num_matches].consumer_var = consumer_var; | |||
1891 | this->num_matches++; | |||
1892 | if (producer_var) | |||
1893 | producer_var->data.is_unmatched_generic_inout = 0; | |||
1894 | if (consumer_var) | |||
1895 | consumer_var->data.is_unmatched_generic_inout = 0; | |||
1896 | } | |||
1897 | ||||
1898 | ||||
1899 | /** | |||
1900 | * Choose locations for all of the variable matches that were previously | |||
1901 | * passed to varying_matches::record(). | |||
1902 | * \param components returns array[slot] of number of components used | |||
1903 | * per slot (1, 2, 3 or 4) | |||
1904 | * \param reserved_slots bitmask indicating which varying slots are already | |||
1905 | * allocated | |||
1906 | * \return number of slots (4-element vectors) allocated | |||
1907 | */ | |||
1908 | unsigned | |||
1909 | varying_matches::assign_locations(struct gl_shader_program *prog, | |||
1910 | uint8_t components[], | |||
1911 | uint64_t reserved_slots) | |||
1912 | { | |||
1913 | /* If packing has been disabled then we cannot safely sort the varyings by | |||
1914 | * class as it may mean we are using a version of OpenGL where | |||
1915 | * interpolation qualifiers are not guaranteed to be matching across | |||
1916 | * shaders, sorting in this case could result in mismatching shader | |||
1917 | * interfaces. | |||
1918 | * When packing is disabled the sort orders varyings used by transform | |||
1919 | * feedback first, but also depends on *undefined behaviour* of qsort to | |||
1920 | * reverse the order of the varyings. See: xfb_comparator(). | |||
1921 | * | |||
1922 | * If packing is only disabled for xfb varyings (mutually exclusive with | |||
1923 | * disable_varying_packing), we then group varyings depending on if they | |||
1924 | * are captured for transform feedback. The same *undefined behaviour* is | |||
1925 | * taken advantage of. | |||
1926 | */ | |||
1927 | if (this->disable_varying_packing) { | |||
1928 | /* Only sort varyings that are only used by transform feedback. */ | |||
1929 | qsort(this->matches, this->num_matches, sizeof(*this->matches), | |||
1930 | &varying_matches::xfb_comparator); | |||
1931 | } else if (this->disable_xfb_packing) { | |||
1932 | /* Only sort varyings that are NOT used by transform feedback. */ | |||
1933 | qsort(this->matches, this->num_matches, sizeof(*this->matches), | |||
1934 | &varying_matches::not_xfb_comparator); | |||
1935 | } else { | |||
1936 | /* Sort varying matches into an order that makes them easy to pack. */ | |||
1937 | qsort(this->matches, this->num_matches, sizeof(*this->matches), | |||
1938 | &varying_matches::match_comparator); | |||
1939 | } | |||
1940 | ||||
1941 | unsigned generic_location = 0; | |||
1942 | unsigned generic_patch_location = MAX_VARYING32*4; | |||
1943 | bool previous_var_xfb = false; | |||
1944 | bool previous_var_xfb_only = false; | |||
1945 | unsigned previous_packing_class = ~0u; | |||
1946 | ||||
1947 | /* For tranform feedback separate mode, we know the number of attributes | |||
1948 | * is <= the number of buffers. So packing isn't critical. In fact, | |||
1949 | * packing vec3 attributes can cause trouble because splitting a vec3 | |||
1950 | * effectively creates an additional transform feedback output. The | |||
1951 | * extra TFB output may exceed device driver limits. | |||
1952 | */ | |||
1953 | const bool dont_pack_vec3 = | |||
1954 | (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS0x8C8D && | |||
1955 | prog->TransformFeedback.NumVarying > 0); | |||
1956 | ||||
1957 | for (unsigned i = 0; i < this->num_matches; i++) { | |||
1958 | unsigned *location = &generic_location; | |||
1959 | const ir_variable *var; | |||
1960 | const glsl_type *type; | |||
1961 | bool is_vertex_input = false; | |||
1962 | ||||
1963 | if (matches[i].consumer_var) { | |||
1964 | var = matches[i].consumer_var; | |||
1965 | type = get_varying_type(var, consumer_stage); | |||
1966 | if (consumer_stage == MESA_SHADER_VERTEX) | |||
1967 | is_vertex_input = true; | |||
1968 | } else { | |||
1969 | var = matches[i].producer_var; | |||
1970 | type = get_varying_type(var, producer_stage); | |||
1971 | } | |||
1972 | ||||
1973 | if (var->data.patch) | |||
1974 | location = &generic_patch_location; | |||
1975 | ||||
1976 | /* Advance to the next slot if this varying has a different packing | |||
1977 | * class than the previous one, and we're not already on a slot | |||
1978 | * boundary. | |||
1979 | * | |||
1980 | * Also advance if varying packing is disabled for transform feedback, | |||
1981 | * and previous or current varying is used for transform feedback. | |||
1982 | * | |||
1983 | * Also advance to the next slot if packing is disabled. This makes sure | |||
1984 | * we don't assign varyings the same locations which is possible | |||
1985 | * because we still pack individual arrays, records and matrices even | |||
1986 | * when packing is disabled. Note we don't advance to the next slot if | |||
1987 | * we can pack varyings together that are only used for transform | |||
1988 | * feedback. | |||
1989 | */ | |||
1990 | if (var->data.must_be_shader_input || | |||
1991 | (this->disable_xfb_packing && | |||
1992 | (previous_var_xfb || var->data.is_xfb)) || | |||
1993 | (this->disable_varying_packing && | |||
1994 | !(previous_var_xfb_only && var->data.is_xfb_only)) || | |||
1995 | (previous_packing_class != this->matches[i].packing_class) || | |||
1996 | (this->matches[i].packing_order == PACKING_ORDER_VEC3 && | |||
1997 | dont_pack_vec3)) { | |||
1998 | *location = ALIGN(*location, 4); | |||
1999 | } | |||
2000 | ||||
2001 | previous_var_xfb = var->data.is_xfb; | |||
2002 | previous_var_xfb_only = var->data.is_xfb_only; | |||
2003 | previous_packing_class = this->matches[i].packing_class; | |||
2004 | ||||
2005 | /* The number of components taken up by this variable. For vertex shader | |||
2006 | * inputs, we use the number of slots * 4, as they have different | |||
2007 | * counting rules. | |||
2008 | */ | |||
2009 | unsigned num_components = is_vertex_input ? | |||
2010 | type->count_attribute_slots(is_vertex_input) * 4 : | |||
2011 | this->matches[i].num_components; | |||
2012 | ||||
2013 | /* The last slot for this variable, inclusive. */ | |||
2014 | unsigned slot_end = *location + num_components - 1; | |||
2015 | ||||
2016 | /* FIXME: We could be smarter in the below code and loop back over | |||
2017 | * trying to fill any locations that we skipped because we couldn't pack | |||
2018 | * the varying between an explicit location. For now just let the user | |||
2019 | * hit the linking error if we run out of room and suggest they use | |||
2020 | * explicit locations. | |||
2021 | */ | |||
2022 | while (slot_end < MAX_VARYING32 * 4u) { | |||
2023 | const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1; | |||
2024 | const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u); | |||
2025 | ||||
2026 | assert(slots > 0)(static_cast <bool> (slots > 0) ? void (0) : __assert_fail ("slots > 0", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
2027 | ||||
2028 | if ((reserved_slots & slot_mask) == 0) { | |||
2029 | break; | |||
2030 | } | |||
2031 | ||||
2032 | *location = ALIGN(*location + 1, 4); | |||
2033 | slot_end = *location + num_components - 1; | |||
2034 | } | |||
2035 | ||||
2036 | if (!var->data.patch && slot_end >= MAX_VARYING32 * 4u) { | |||
2037 | linker_error(prog, "insufficient contiguous locations available for " | |||
2038 | "%s it is possible an array or struct could not be " | |||
2039 | "packed between varyings with explicit locations. Try " | |||
2040 | "using an explicit location for arrays and structs.", | |||
2041 | var->name); | |||
2042 | } | |||
2043 | ||||
2044 | if (slot_end < MAX_VARYINGS_INCL_PATCH((((VARYING_SLOT_VAR0 + 32)) + 32) - VARYING_SLOT_VAR0) * 4u) { | |||
2045 | for (unsigned j = *location / 4u; j < slot_end / 4u; j++) | |||
2046 | components[j] = 4; | |||
2047 | components[slot_end / 4u] = (slot_end & 3) + 1; | |||
2048 | } | |||
2049 | ||||
2050 | this->matches[i].generic_location = *location; | |||
2051 | ||||
2052 | *location = slot_end + 1; | |||
2053 | } | |||
2054 | ||||
2055 | return (generic_location + 3) / 4; | |||
2056 | } | |||
2057 | ||||
2058 | ||||
2059 | /** | |||
2060 | * Update the producer and consumer shaders to reflect the locations | |||
2061 | * assignments that were made by varying_matches::assign_locations(). | |||
2062 | */ | |||
2063 | void | |||
2064 | varying_matches::store_locations() const | |||
2065 | { | |||
2066 | /* Check is location needs to be packed with lower_packed_varyings() or if | |||
2067 | * we can just use ARB_enhanced_layouts packing. | |||
2068 | */ | |||
2069 | bool pack_loc[MAX_VARYINGS_INCL_PATCH((((VARYING_SLOT_VAR0 + 32)) + 32) - VARYING_SLOT_VAR0)] = { 0 }; | |||
2070 | const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH((((VARYING_SLOT_VAR0 + 32)) + 32) - VARYING_SLOT_VAR0)][4] = { {NULL__null, NULL__null} }; | |||
2071 | ||||
2072 | for (unsigned i = 0; i < this->num_matches; i++) { | |||
2073 | ir_variable *producer_var = this->matches[i].producer_var; | |||
2074 | ir_variable *consumer_var = this->matches[i].consumer_var; | |||
2075 | unsigned generic_location = this->matches[i].generic_location; | |||
2076 | unsigned slot = generic_location / 4; | |||
2077 | unsigned offset = generic_location % 4; | |||
2078 | ||||
2079 | if (producer_var) { | |||
2080 | producer_var->data.location = VARYING_SLOT_VAR0 + slot; | |||
2081 | producer_var->data.location_frac = offset; | |||
2082 | } | |||
2083 | ||||
2084 | if (consumer_var) { | |||
2085 | assert(consumer_var->data.location == -1)(static_cast <bool> (consumer_var->data.location == - 1) ? void (0) : __assert_fail ("consumer_var->data.location == -1" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2086 | consumer_var->data.location = VARYING_SLOT_VAR0 + slot; | |||
2087 | consumer_var->data.location_frac = offset; | |||
2088 | } | |||
2089 | ||||
2090 | /* Find locations suitable for native packing via | |||
2091 | * ARB_enhanced_layouts. | |||
2092 | */ | |||
2093 | if (producer_var && consumer_var) { | |||
2094 | if (enhanced_layouts_enabled) { | |||
2095 | const glsl_type *type = | |||
2096 | get_varying_type(producer_var, producer_stage); | |||
2097 | if (type->is_array() || type->is_matrix() || type->is_struct() || | |||
2098 | type->is_64bit()) { | |||
2099 | unsigned comp_slots = type->component_slots() + offset; | |||
2100 | unsigned slots = comp_slots / 4; | |||
2101 | if (comp_slots % 4) | |||
2102 | slots += 1; | |||
2103 | ||||
2104 | for (unsigned j = 0; j < slots; j++) { | |||
2105 | pack_loc[slot + j] = true; | |||
2106 | } | |||
2107 | } else if (offset + type->vector_elements > 4) { | |||
2108 | pack_loc[slot] = true; | |||
2109 | pack_loc[slot + 1] = true; | |||
2110 | } else { | |||
2111 | loc_type[slot][offset] = type; | |||
2112 | } | |||
2113 | } | |||
2114 | } | |||
2115 | } | |||
2116 | ||||
2117 | /* Attempt to use ARB_enhanced_layouts for more efficient packing if | |||
2118 | * suitable. | |||
2119 | */ | |||
2120 | if (enhanced_layouts_enabled) { | |||
2121 | for (unsigned i = 0; i < this->num_matches; i++) { | |||
2122 | ir_variable *producer_var = this->matches[i].producer_var; | |||
2123 | ir_variable *consumer_var = this->matches[i].consumer_var; | |||
2124 | unsigned generic_location = this->matches[i].generic_location; | |||
2125 | unsigned slot = generic_location / 4; | |||
2126 | ||||
2127 | if (pack_loc[slot] || !producer_var || !consumer_var) | |||
2128 | continue; | |||
2129 | ||||
2130 | const glsl_type *type = | |||
2131 | get_varying_type(producer_var, producer_stage); | |||
2132 | bool type_match = true; | |||
2133 | for (unsigned j = 0; j < 4; j++) { | |||
2134 | if (loc_type[slot][j]) { | |||
2135 | if (type->base_type != loc_type[slot][j]->base_type) | |||
2136 | type_match = false; | |||
2137 | } | |||
2138 | } | |||
2139 | ||||
2140 | if (type_match) { | |||
2141 | producer_var->data.explicit_location = 1; | |||
2142 | consumer_var->data.explicit_location = 1; | |||
2143 | producer_var->data.explicit_component = 1; | |||
2144 | consumer_var->data.explicit_component = 1; | |||
2145 | } | |||
2146 | } | |||
2147 | } | |||
2148 | } | |||
2149 | ||||
2150 | ||||
2151 | /** | |||
2152 | * Compute the "packing class" of the given varying. This is an unsigned | |||
2153 | * integer with the property that two variables in the same packing class can | |||
2154 | * be safely backed into the same vec4. | |||
2155 | */ | |||
2156 | unsigned | |||
2157 | varying_matches::compute_packing_class(const ir_variable *var) | |||
2158 | { | |||
2159 | /* Without help from the back-end, there is no way to pack together | |||
2160 | * variables with different interpolation types, because | |||
2161 | * lower_packed_varyings must choose exactly one interpolation type for | |||
2162 | * each packed varying it creates. | |||
2163 | * | |||
2164 | * However, we can safely pack together floats, ints, and uints, because: | |||
2165 | * | |||
2166 | * - varyings of base type "int" and "uint" must use the "flat" | |||
2167 | * interpolation type, which can only occur in GLSL 1.30 and above. | |||
2168 | * | |||
2169 | * - On platforms that support GLSL 1.30 and above, lower_packed_varyings | |||
2170 | * can store flat floats as ints without losing any information (using | |||
2171 | * the ir_unop_bitcast_* opcodes). | |||
2172 | * | |||
2173 | * Therefore, the packing class depends only on the interpolation type. | |||
2174 | */ | |||
2175 | const unsigned interp = var->is_interpolation_flat() | |||
2176 | ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation; | |||
2177 | ||||
2178 | assert(interp < (1 << 3))(static_cast <bool> (interp < (1 << 3)) ? void (0) : __assert_fail ("interp < (1 << 3)", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
2179 | ||||
2180 | const unsigned packing_class = (interp << 0) | | |||
2181 | (var->data.centroid << 3) | | |||
2182 | (var->data.sample << 4) | | |||
2183 | (var->data.patch << 5) | | |||
2184 | (var->data.must_be_shader_input << 6); | |||
2185 | ||||
2186 | return packing_class; | |||
2187 | } | |||
2188 | ||||
2189 | ||||
2190 | /** | |||
2191 | * Compute the "packing order" of the given varying. This is a sort key we | |||
2192 | * use to determine when to attempt to pack the given varying relative to | |||
2193 | * other varyings in the same packing class. | |||
2194 | */ | |||
2195 | varying_matches::packing_order_enum | |||
2196 | varying_matches::compute_packing_order(const ir_variable *var) | |||
2197 | { | |||
2198 | const glsl_type *element_type = var->type; | |||
2199 | ||||
2200 | while (element_type->is_array()) { | |||
2201 | element_type = element_type->fields.array; | |||
2202 | } | |||
2203 | ||||
2204 | switch (element_type->component_slots() % 4) { | |||
2205 | case 1: return PACKING_ORDER_SCALAR; | |||
2206 | case 2: return PACKING_ORDER_VEC2; | |||
2207 | case 3: return PACKING_ORDER_VEC3; | |||
2208 | case 0: return PACKING_ORDER_VEC4; | |||
2209 | default: | |||
2210 | assert(!"Unexpected value of vector_elements")(static_cast <bool> (!"Unexpected value of vector_elements" ) ? void (0) : __assert_fail ("!\"Unexpected value of vector_elements\"" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2211 | return PACKING_ORDER_VEC4; | |||
2212 | } | |||
2213 | } | |||
2214 | ||||
2215 | ||||
2216 | /** | |||
2217 | * Comparison function passed to qsort() to sort varyings by packing_class and | |||
2218 | * then by packing_order. | |||
2219 | */ | |||
2220 | int | |||
2221 | varying_matches::match_comparator(const void *x_generic, const void *y_generic) | |||
2222 | { | |||
2223 | const match *x = (const match *) x_generic; | |||
2224 | const match *y = (const match *) y_generic; | |||
2225 | ||||
2226 | if (x->packing_class != y->packing_class) | |||
2227 | return x->packing_class - y->packing_class; | |||
2228 | return x->packing_order - y->packing_order; | |||
2229 | } | |||
2230 | ||||
2231 | ||||
2232 | /** | |||
2233 | * Comparison function passed to qsort() to sort varyings used only by | |||
2234 | * transform feedback when packing of other varyings is disabled. | |||
2235 | */ | |||
2236 | int | |||
2237 | varying_matches::xfb_comparator(const void *x_generic, const void *y_generic) | |||
2238 | { | |||
2239 | const match *x = (const match *) x_generic; | |||
2240 | ||||
2241 | if (x->producer_var != NULL__null && x->producer_var->data.is_xfb_only) | |||
2242 | return match_comparator(x_generic, y_generic); | |||
2243 | ||||
2244 | /* FIXME: When the comparator returns 0 it means the elements being | |||
2245 | * compared are equivalent. However the qsort documentation says: | |||
2246 | * | |||
2247 | * "The order of equivalent elements is undefined." | |||
2248 | * | |||
2249 | * In practice the sort ends up reversing the order of the varyings which | |||
2250 | * means locations are also assigned in this reversed order and happens to | |||
2251 | * be what we want. This is also whats happening in | |||
2252 | * varying_matches::match_comparator(). | |||
2253 | */ | |||
2254 | return 0; | |||
2255 | } | |||
2256 | ||||
2257 | ||||
2258 | /** | |||
2259 | * Comparison function passed to qsort() to sort varyings NOT used by | |||
2260 | * transform feedback when packing of xfb varyings is disabled. | |||
2261 | */ | |||
2262 | int | |||
2263 | varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic) | |||
2264 | { | |||
2265 | const match *x = (const match *) x_generic; | |||
2266 | ||||
2267 | if (x->producer_var != NULL__null && !x->producer_var->data.is_xfb) | |||
2268 | return match_comparator(x_generic, y_generic); | |||
2269 | ||||
2270 | /* FIXME: When the comparator returns 0 it means the elements being | |||
2271 | * compared are equivalent. However the qsort documentation says: | |||
2272 | * | |||
2273 | * "The order of equivalent elements is undefined." | |||
2274 | * | |||
2275 | * In practice the sort ends up reversing the order of the varyings which | |||
2276 | * means locations are also assigned in this reversed order and happens to | |||
2277 | * be what we want. This is also whats happening in | |||
2278 | * varying_matches::match_comparator(). | |||
2279 | */ | |||
2280 | return 0; | |||
2281 | } | |||
2282 | ||||
2283 | ||||
2284 | /** | |||
2285 | * Is the given variable a varying variable to be counted against the | |||
2286 | * limit in ctx->Const.MaxVarying? | |||
2287 | * This includes variables such as texcoords, colors and generic | |||
2288 | * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. | |||
2289 | */ | |||
2290 | static bool | |||
2291 | var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var) | |||
2292 | { | |||
2293 | /* Only fragment shaders will take a varying variable as an input */ | |||
2294 | if (stage == MESA_SHADER_FRAGMENT && | |||
2295 | var->data.mode == ir_var_shader_in) { | |||
2296 | switch (var->data.location) { | |||
2297 | case VARYING_SLOT_POS: | |||
2298 | case VARYING_SLOT_FACE: | |||
2299 | case VARYING_SLOT_PNTC: | |||
2300 | return false; | |||
2301 | default: | |||
2302 | return true; | |||
2303 | } | |||
2304 | } | |||
2305 | return false; | |||
2306 | } | |||
2307 | ||||
2308 | ||||
2309 | /** | |||
2310 | * Visitor class that generates tfeedback_candidate structs describing all | |||
2311 | * possible targets of transform feedback. | |||
2312 | * | |||
2313 | * tfeedback_candidate structs are stored in the hash table | |||
2314 | * tfeedback_candidates, which is passed to the constructor. This hash table | |||
2315 | * maps varying names to instances of the tfeedback_candidate struct. | |||
2316 | */ | |||
2317 | class tfeedback_candidate_generator : public program_resource_visitor | |||
2318 | { | |||
2319 | public: | |||
2320 | tfeedback_candidate_generator(void *mem_ctx, | |||
2321 | hash_table *tfeedback_candidates, | |||
2322 | gl_shader_stage stage) | |||
2323 | : mem_ctx(mem_ctx), | |||
2324 | tfeedback_candidates(tfeedback_candidates), | |||
2325 | stage(stage), | |||
2326 | toplevel_var(NULL__null), | |||
2327 | varying_floats(0) | |||
2328 | { | |||
2329 | } | |||
2330 | ||||
2331 | void process(ir_variable *var) | |||
2332 | { | |||
2333 | /* All named varying interface blocks should be flattened by now */ | |||
2334 | assert(!var->is_interface_instance())(static_cast <bool> (!var->is_interface_instance()) ? void (0) : __assert_fail ("!var->is_interface_instance()" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2335 | assert(var->data.mode == ir_var_shader_out)(static_cast <bool> (var->data.mode == ir_var_shader_out ) ? void (0) : __assert_fail ("var->data.mode == ir_var_shader_out" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2336 | ||||
2337 | this->toplevel_var = var; | |||
2338 | this->varying_floats = 0; | |||
2339 | const glsl_type *t = | |||
2340 | var->data.from_named_ifc_block ? var->get_interface_type() : var->type; | |||
2341 | if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) { | |||
2342 | assert(t->is_array())(static_cast <bool> (t->is_array()) ? void (0) : __assert_fail ("t->is_array()", __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__)); | |||
2343 | t = t->fields.array; | |||
2344 | } | |||
2345 | program_resource_visitor::process(var, t, false); | |||
2346 | } | |||
2347 | ||||
2348 | private: | |||
2349 | virtual void visit_field(const glsl_type *type, const char *name, | |||
2350 | bool /* row_major */, | |||
2351 | const glsl_type * /* record_type */, | |||
2352 | const enum glsl_interface_packing, | |||
2353 | bool /* last_field */) | |||
2354 | { | |||
2355 | assert(!type->without_array()->is_struct())(static_cast <bool> (!type->without_array()->is_struct ()) ? void (0) : __assert_fail ("!type->without_array()->is_struct()" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2356 | assert(!type->without_array()->is_interface())(static_cast <bool> (!type->without_array()->is_interface ()) ? void (0) : __assert_fail ("!type->without_array()->is_interface()" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2357 | ||||
2358 | tfeedback_candidate *candidate | |||
2359 | = rzalloc(this->mem_ctx, tfeedback_candidate)((tfeedback_candidate *) rzalloc_size(this->mem_ctx, sizeof (tfeedback_candidate))); | |||
2360 | candidate->toplevel_var = this->toplevel_var; | |||
2361 | candidate->type = type; | |||
2362 | candidate->offset = this->varying_floats; | |||
2363 | _mesa_hash_table_insert(this->tfeedback_candidates, | |||
2364 | ralloc_strdup(this->mem_ctx, name), | |||
2365 | candidate); | |||
2366 | this->varying_floats += type->component_slots(); | |||
2367 | } | |||
2368 | ||||
2369 | /** | |||
2370 | * Memory context used to allocate hash table keys and values. | |||
2371 | */ | |||
2372 | void * const mem_ctx; | |||
2373 | ||||
2374 | /** | |||
2375 | * Hash table in which tfeedback_candidate objects should be stored. | |||
2376 | */ | |||
2377 | hash_table * const tfeedback_candidates; | |||
2378 | ||||
2379 | gl_shader_stage stage; | |||
2380 | ||||
2381 | /** | |||
2382 | * Pointer to the toplevel variable that is being traversed. | |||
2383 | */ | |||
2384 | ir_variable *toplevel_var; | |||
2385 | ||||
2386 | /** | |||
2387 | * Total number of varying floats that have been visited so far. This is | |||
2388 | * used to determine the offset to each varying within the toplevel | |||
2389 | * variable. | |||
2390 | */ | |||
2391 | unsigned varying_floats; | |||
2392 | }; | |||
2393 | ||||
2394 | ||||
2395 | namespace linker { | |||
2396 | ||||
2397 | void | |||
2398 | populate_consumer_input_sets(void *mem_ctx, exec_list *ir, | |||
2399 | hash_table *consumer_inputs, | |||
2400 | hash_table *consumer_interface_inputs, | |||
2401 | ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX(((VARYING_SLOT_VAR0 + 32)) + 32)]) | |||
2402 | { | |||
2403 | memset(consumer_inputs_with_locations, | |||
2404 | 0, | |||
2405 | sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX(((VARYING_SLOT_VAR0 + 32)) + 32)); | |||
2406 | ||||
2407 | foreach_in_list(ir_instruction, node, ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((ir) ->head_sentinel.next) ? (ir_instruction *) ((ir)->head_sentinel .next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel ((node)->next) ? (ir_instruction *) ((node)->next) : __null )) { | |||
2408 | ir_variable *const input_var = node->as_variable(); | |||
2409 | ||||
2410 | if (input_var != NULL__null && input_var->data.mode == ir_var_shader_in) { | |||
2411 | /* All interface blocks should have been lowered by this point */ | |||
2412 | assert(!input_var->type->is_interface())(static_cast <bool> (!input_var->type->is_interface ()) ? void (0) : __assert_fail ("!input_var->type->is_interface()" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2413 | ||||
2414 | if (input_var->data.explicit_location) { | |||
2415 | /* assign_varying_locations only cares about finding the | |||
2416 | * ir_variable at the start of a contiguous location block. | |||
2417 | * | |||
2418 | * - For !producer, consumer_inputs_with_locations isn't used. | |||
2419 | * | |||
2420 | * - For !consumer, consumer_inputs_with_locations is empty. | |||
2421 | * | |||
2422 | * For consumer && producer, if you were trying to set some | |||
2423 | * ir_variable to the middle of a location block on the other side | |||
2424 | * of producer/consumer, cross_validate_outputs_to_inputs() should | |||
2425 | * be link-erroring due to either type mismatch or location | |||
2426 | * overlaps. If the variables do match up, then they've got a | |||
2427 | * matching data.location and you only looked at | |||
2428 | * consumer_inputs_with_locations[var->data.location], not any | |||
2429 | * following entries for the array/structure. | |||
2430 | */ | |||
2431 | consumer_inputs_with_locations[input_var->data.location] = | |||
2432 | input_var; | |||
2433 | } else if (input_var->get_interface_type() != NULL__null) { | |||
2434 | char *const iface_field_name = | |||
2435 | ralloc_asprintf(mem_ctx, "%s.%s", | |||
2436 | input_var->get_interface_type()->without_array()->name, | |||
2437 | input_var->name); | |||
2438 | _mesa_hash_table_insert(consumer_interface_inputs, | |||
2439 | iface_field_name, input_var); | |||
2440 | } else { | |||
2441 | _mesa_hash_table_insert(consumer_inputs, | |||
2442 | ralloc_strdup(mem_ctx, input_var->name), | |||
2443 | input_var); | |||
2444 | } | |||
2445 | } | |||
2446 | } | |||
2447 | } | |||
2448 | ||||
2449 | /** | |||
2450 | * Find a variable from the consumer that "matches" the specified variable | |||
2451 | * | |||
2452 | * This function only finds inputs with names that match. There is no | |||
2453 | * validation (here) that the types, etc. are compatible. | |||
2454 | */ | |||
2455 | ir_variable * | |||
2456 | get_matching_input(void *mem_ctx, | |||
2457 | const ir_variable *output_var, | |||
2458 | hash_table *consumer_inputs, | |||
2459 | hash_table *consumer_interface_inputs, | |||
2460 | ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX(((VARYING_SLOT_VAR0 + 32)) + 32)]) | |||
2461 | { | |||
2462 | ir_variable *input_var; | |||
2463 | ||||
2464 | if (output_var->data.explicit_location) { | |||
2465 | input_var = consumer_inputs_with_locations[output_var->data.location]; | |||
2466 | } else if (output_var->get_interface_type() != NULL__null) { | |||
2467 | char *const iface_field_name = | |||
2468 | ralloc_asprintf(mem_ctx, "%s.%s", | |||
2469 | output_var->get_interface_type()->without_array()->name, | |||
2470 | output_var->name); | |||
2471 | hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name); | |||
2472 | input_var = entry ? (ir_variable *) entry->data : NULL__null; | |||
2473 | } else { | |||
2474 | hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name); | |||
2475 | input_var = entry ? (ir_variable *) entry->data : NULL__null; | |||
2476 | } | |||
2477 | ||||
2478 | return (input_var == NULL__null || input_var->data.mode != ir_var_shader_in) | |||
2479 | ? NULL__null : input_var; | |||
2480 | } | |||
2481 | ||||
2482 | } | |||
2483 | ||||
2484 | static int | |||
2485 | io_variable_cmp(const void *_a, const void *_b) | |||
2486 | { | |||
2487 | const ir_variable *const a = *(const ir_variable **) _a; | |||
2488 | const ir_variable *const b = *(const ir_variable **) _b; | |||
2489 | ||||
2490 | if (a->data.explicit_location && b->data.explicit_location) | |||
2491 | return b->data.location - a->data.location; | |||
2492 | ||||
2493 | if (a->data.explicit_location && !b->data.explicit_location) | |||
2494 | return 1; | |||
2495 | ||||
2496 | if (!a->data.explicit_location && b->data.explicit_location) | |||
2497 | return -1; | |||
2498 | ||||
2499 | return -strcmp(a->name, b->name); | |||
2500 | } | |||
2501 | ||||
2502 | /** | |||
2503 | * Sort the shader IO variables into canonical order | |||
2504 | */ | |||
2505 | static void | |||
2506 | canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) | |||
2507 | { | |||
2508 | ir_variable *var_table[MAX_PROGRAM_OUTPUTS64 * 4]; | |||
2509 | unsigned num_variables = 0; | |||
2510 | ||||
2511 | foreach_in_list(ir_instruction, node, ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((ir) ->head_sentinel.next) ? (ir_instruction *) ((ir)->head_sentinel .next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel ((node)->next) ? (ir_instruction *) ((node)->next) : __null )) { | |||
2512 | ir_variable *const var = node->as_variable(); | |||
2513 | ||||
2514 | if (var == NULL__null || var->data.mode != io_mode) | |||
2515 | continue; | |||
2516 | ||||
2517 | /* If we have already encountered more I/O variables that could | |||
2518 | * successfully link, bail. | |||
2519 | */ | |||
2520 | if (num_variables == ARRAY_SIZE(var_table)(sizeof(var_table) / sizeof((var_table)[0]))) | |||
2521 | return; | |||
2522 | ||||
2523 | var_table[num_variables++] = var; | |||
2524 | } | |||
2525 | ||||
2526 | if (num_variables == 0) | |||
2527 | return; | |||
2528 | ||||
2529 | /* Sort the list in reverse order (io_variable_cmp handles this). Later | |||
2530 | * we're going to push the variables on to the IR list as a stack, so we | |||
2531 | * want the last variable (in canonical order) to be first in the list. | |||
2532 | */ | |||
2533 | qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); | |||
2534 | ||||
2535 | /* Remove the variable from it's current location in the IR, and put it at | |||
2536 | * the front. | |||
2537 | */ | |||
2538 | for (unsigned i = 0; i < num_variables; i++) { | |||
2539 | var_table[i]->remove(); | |||
2540 | ir->push_head(var_table[i]); | |||
2541 | } | |||
2542 | } | |||
2543 | ||||
2544 | /** | |||
2545 | * Generate a bitfield map of the explicit locations for shader varyings. | |||
2546 | * | |||
2547 | * Note: For Tessellation shaders we are sitting right on the limits of the | |||
2548 | * 64 bit map. Per-vertex and per-patch both have separate location domains | |||
2549 | * with a max of MAX_VARYING. | |||
2550 | */ | |||
2551 | static uint64_t | |||
2552 | reserved_varying_slot(struct gl_linked_shader *stage, | |||
2553 | ir_variable_mode io_mode) | |||
2554 | { | |||
2555 | assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out)(static_cast <bool> (io_mode == ir_var_shader_in || io_mode == ir_var_shader_out) ? void (0) : __assert_fail ("io_mode == ir_var_shader_in || io_mode == ir_var_shader_out" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2556 | /* Avoid an overflow of the returned value */ | |||
2557 | assert(MAX_VARYINGS_INCL_PATCH <= 64)(static_cast <bool> (((((VARYING_SLOT_VAR0 + 32)) + 32) - VARYING_SLOT_VAR0) <= 64) ? void (0) : __assert_fail ("MAX_VARYINGS_INCL_PATCH <= 64" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2558 | ||||
2559 | uint64_t slots = 0; | |||
2560 | int var_slot; | |||
2561 | ||||
2562 | if (!stage) | |||
2563 | return slots; | |||
2564 | ||||
2565 | foreach_in_list(ir_instruction, node, stage->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((stage ->ir)->head_sentinel.next) ? (ir_instruction *) ((stage ->ir)->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
2566 | ir_variable *const var = node->as_variable(); | |||
2567 | ||||
2568 | if (var == NULL__null || var->data.mode != io_mode || | |||
2569 | !var->data.explicit_location || | |||
2570 | var->data.location < VARYING_SLOT_VAR0) | |||
2571 | continue; | |||
2572 | ||||
2573 | var_slot = var->data.location - VARYING_SLOT_VAR0; | |||
2574 | ||||
2575 | unsigned num_elements = get_varying_type(var, stage->Stage) | |||
2576 | ->count_attribute_slots(io_mode == ir_var_shader_in && | |||
2577 | stage->Stage == MESA_SHADER_VERTEX); | |||
2578 | for (unsigned i = 0; i < num_elements; i++) { | |||
2579 | if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH((((VARYING_SLOT_VAR0 + 32)) + 32) - VARYING_SLOT_VAR0)) | |||
2580 | slots |= UINT64_C(1)1UL << var_slot; | |||
2581 | var_slot += 1; | |||
2582 | } | |||
2583 | } | |||
2584 | ||||
2585 | return slots; | |||
2586 | } | |||
2587 | ||||
2588 | ||||
2589 | /** | |||
2590 | * Assign locations for all variables that are produced in one pipeline stage | |||
2591 | * (the "producer") and consumed in the next stage (the "consumer"). | |||
2592 | * | |||
2593 | * Variables produced by the producer may also be consumed by transform | |||
2594 | * feedback. | |||
2595 | * | |||
2596 | * \param num_tfeedback_decls is the number of declarations indicating | |||
2597 | * variables that may be consumed by transform feedback. | |||
2598 | * | |||
2599 | * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects | |||
2600 | * representing the result of parsing the strings passed to | |||
2601 | * glTransformFeedbackVaryings(). assign_location() will be called for | |||
2602 | * each of these objects that matches one of the outputs of the | |||
2603 | * producer. | |||
2604 | * | |||
2605 | * When num_tfeedback_decls is nonzero, it is permissible for the consumer to | |||
2606 | * be NULL. In this case, varying locations are assigned solely based on the | |||
2607 | * requirements of transform feedback. | |||
2608 | */ | |||
2609 | static bool | |||
2610 | assign_varying_locations(struct gl_context *ctx, | |||
2611 | void *mem_ctx, | |||
2612 | struct gl_shader_program *prog, | |||
2613 | gl_linked_shader *producer, | |||
2614 | gl_linked_shader *consumer, | |||
2615 | unsigned num_tfeedback_decls, | |||
2616 | tfeedback_decl *tfeedback_decls, | |||
2617 | const uint64_t reserved_slots) | |||
2618 | { | |||
2619 | /* Tessellation shaders treat inputs and outputs as shared memory and can | |||
2620 | * access inputs and outputs of other invocations. | |||
2621 | * Therefore, they can't be lowered to temps easily (and definitely not | |||
2622 | * efficiently). | |||
2623 | */ | |||
2624 | bool unpackable_tess = | |||
2625 | (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || | |||
| ||||
2626 | (consumer
| |||
2627 | (producer && producer->Stage == MESA_SHADER_TESS_CTRL); | |||
2628 | ||||
2629 | /* Transform feedback code assumes varying arrays are packed, so if the | |||
2630 | * driver has disabled varying packing, make sure to at least enable | |||
2631 | * packing required by transform feedback. See below for exception. | |||
2632 | */ | |||
2633 | bool xfb_enabled = | |||
2634 | ctx->Extensions.EXT_transform_feedback && !unpackable_tess; | |||
2635 | ||||
2636 | /* Some drivers actually requires packing to be explicitly disabled | |||
2637 | * for varyings used by transform feedback. | |||
2638 | */ | |||
2639 | bool disable_xfb_packing = | |||
2640 | ctx->Const.DisableTransformFeedbackPacking; | |||
2641 | ||||
2642 | /* Disable packing on outward facing interfaces for SSO because in ES we | |||
2643 | * need to retain the unpacked varying information for draw time | |||
2644 | * validation. | |||
2645 | * | |||
2646 | * Packing is still enabled on individual arrays, structs, and matrices as | |||
2647 | * these are required by the transform feedback code and it is still safe | |||
2648 | * to do so. We also enable packing when a varying is only used for | |||
2649 | * transform feedback and its not a SSO. | |||
2650 | */ | |||
2651 | bool disable_varying_packing = | |||
2652 | ctx->Const.DisableVaryingPacking || unpackable_tess; | |||
2653 | if (prog->SeparateShader && (producer == NULL__null || consumer == NULL__null)) | |||
2654 | disable_varying_packing = true; | |||
2655 | ||||
2656 | varying_matches matches(disable_varying_packing, | |||
2657 | disable_xfb_packing, | |||
2658 | xfb_enabled, | |||
2659 | ctx->Extensions.ARB_enhanced_layouts, | |||
2660 | producer
| |||
2661 | consumer
| |||
2662 | void *hash_table_ctx = ralloc_context(NULL__null); | |||
2663 | hash_table *tfeedback_candidates = | |||
2664 | _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, | |||
2665 | _mesa_key_string_equal); | |||
2666 | hash_table *consumer_inputs = | |||
2667 | _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, | |||
2668 | _mesa_key_string_equal); | |||
2669 | hash_table *consumer_interface_inputs = | |||
2670 | _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, | |||
2671 | _mesa_key_string_equal); | |||
2672 | ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX(((VARYING_SLOT_VAR0 + 32)) + 32)] = { | |||
2673 | NULL__null, | |||
2674 | }; | |||
2675 | ||||
2676 | unsigned consumer_vertices = 0; | |||
2677 | if (consumer
| |||
2678 | consumer_vertices = prog->Geom.VerticesIn; | |||
2679 | ||||
2680 | /* Operate in a total of four passes. | |||
2681 | * | |||
2682 | * 1. Sort inputs / outputs into a canonical order. This is necessary so | |||
2683 | * that inputs / outputs of separable shaders will be assigned | |||
2684 | * predictable locations regardless of the order in which declarations | |||
2685 | * appeared in the shader source. | |||
2686 | * | |||
2687 | * 2. Assign locations for any matching inputs and outputs. | |||
2688 | * | |||
2689 | * 3. Mark output variables in the producer that do not have locations as | |||
2690 | * not being outputs. This lets the optimizer eliminate them. | |||
2691 | * | |||
2692 | * 4. Mark input variables in the consumer that do not have locations as | |||
2693 | * not being inputs. This lets the optimizer eliminate them. | |||
2694 | */ | |||
2695 | if (consumer
| |||
2696 | canonicalize_shader_io(consumer->ir, ir_var_shader_in); | |||
2697 | ||||
2698 | if (producer
| |||
2699 | canonicalize_shader_io(producer->ir, ir_var_shader_out); | |||
2700 | ||||
2701 | if (consumer
| |||
2702 | linker::populate_consumer_input_sets(mem_ctx, consumer->ir, | |||
2703 | consumer_inputs, | |||
2704 | consumer_interface_inputs, | |||
2705 | consumer_inputs_with_locations); | |||
2706 | ||||
2707 | if (producer
| |||
2708 | foreach_in_list(ir_instruction, node, producer->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((producer ->ir)->head_sentinel.next) ? (ir_instruction *) ((producer ->ir)->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
2709 | ir_variable *const output_var = node->as_variable(); | |||
2710 | ||||
2711 | if (output_var == NULL__null || output_var->data.mode != ir_var_shader_out) | |||
2712 | continue; | |||
2713 | ||||
2714 | /* Only geometry shaders can use non-zero streams */ | |||
2715 | assert(output_var->data.stream == 0 ||(static_cast <bool> (output_var->data.stream == 0 || (output_var->data.stream < 4 && producer->Stage == MESA_SHADER_GEOMETRY)) ? void (0) : __assert_fail ("output_var->data.stream == 0 || (output_var->data.stream < MAX_VERTEX_STREAMS && producer->Stage == MESA_SHADER_GEOMETRY)" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )) | |||
2716 | (output_var->data.stream < MAX_VERTEX_STREAMS &&(static_cast <bool> (output_var->data.stream == 0 || (output_var->data.stream < 4 && producer->Stage == MESA_SHADER_GEOMETRY)) ? void (0) : __assert_fail ("output_var->data.stream == 0 || (output_var->data.stream < MAX_VERTEX_STREAMS && producer->Stage == MESA_SHADER_GEOMETRY)" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )) | |||
2717 | producer->Stage == MESA_SHADER_GEOMETRY))(static_cast <bool> (output_var->data.stream == 0 || (output_var->data.stream < 4 && producer->Stage == MESA_SHADER_GEOMETRY)) ? void (0) : __assert_fail ("output_var->data.stream == 0 || (output_var->data.stream < MAX_VERTEX_STREAMS && producer->Stage == MESA_SHADER_GEOMETRY)" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2718 | ||||
2719 | if (num_tfeedback_decls > 0) { | |||
2720 | tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage); | |||
2721 | /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1 | |||
2722 | * ("Vertex Shader Variables / Output Variables") | |||
2723 | * | |||
2724 | * "Each program object can specify a set of output variables from | |||
2725 | * one shader to be recorded in transform feedback mode (see | |||
2726 | * section 13.3). The variables that can be recorded are those | |||
2727 | * emitted by the first active shader, in order, from the | |||
2728 | * following list: | |||
2729 | * | |||
2730 | * * geometry shader | |||
2731 | * * tessellation evaluation shader | |||
2732 | * * tessellation control shader | |||
2733 | * * vertex shader" | |||
2734 | * | |||
2735 | * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader | |||
2736 | * Variables / Output Variables") tessellation control shader is | |||
2737 | * not included in the stages list. | |||
2738 | */ | |||
2739 | if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) { | |||
2740 | g.process(output_var); | |||
2741 | } | |||
2742 | } | |||
2743 | ||||
2744 | ir_variable *const input_var = | |||
2745 | linker::get_matching_input(mem_ctx, output_var, consumer_inputs, | |||
2746 | consumer_interface_inputs, | |||
2747 | consumer_inputs_with_locations); | |||
2748 | ||||
2749 | /* If a matching input variable was found, add this output (and the | |||
2750 | * input) to the set. If this is a separable program and there is no | |||
2751 | * consumer stage, add the output. | |||
2752 | * | |||
2753 | * Always add TCS outputs. They are shared by all invocations | |||
2754 | * within a patch and can be used as shared memory. | |||
2755 | */ | |||
2756 | if (input_var || (prog->SeparateShader && consumer == NULL__null) || | |||
2757 | producer->Stage == MESA_SHADER_TESS_CTRL) { | |||
2758 | matches.record(output_var, input_var); | |||
2759 | } | |||
2760 | ||||
2761 | /* Only stream 0 outputs can be consumed in the next stage */ | |||
2762 | if (input_var && output_var->data.stream != 0) { | |||
2763 | linker_error(prog, "output %s is assigned to stream=%d but " | |||
2764 | "is linked to an input, which requires stream=0", | |||
2765 | output_var->name, output_var->data.stream); | |||
2766 | ralloc_free(hash_table_ctx); | |||
2767 | return false; | |||
2768 | } | |||
2769 | } | |||
2770 | } else { | |||
2771 | /* If there's no producer stage, then this must be a separable program. | |||
2772 | * For example, we may have a program that has just a fragment shader. | |||
2773 | * Later this program will be used with some arbitrary vertex (or | |||
2774 | * geometry) shader program. This means that locations must be assigned | |||
2775 | * for all the inputs. | |||
2776 | */ | |||
2777 | foreach_in_list(ir_instruction, node, consumer->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((consumer ->ir)->head_sentinel.next) ? (ir_instruction *) ((consumer ->ir)->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
| ||||
2778 | ir_variable *const input_var = node->as_variable(); | |||
2779 | if (input_var && input_var->data.mode == ir_var_shader_in) { | |||
2780 | matches.record(NULL__null, input_var); | |||
2781 | } | |||
2782 | } | |||
2783 | } | |||
2784 | ||||
2785 | for (unsigned i = 0; i < num_tfeedback_decls; ++i) { | |||
2786 | if (!tfeedback_decls[i].is_varying()) | |||
2787 | continue; | |||
2788 | ||||
2789 | const tfeedback_candidate *matched_candidate | |||
2790 | = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); | |||
2791 | ||||
2792 | if (matched_candidate == NULL__null) { | |||
2793 | ralloc_free(hash_table_ctx); | |||
2794 | return false; | |||
2795 | } | |||
2796 | ||||
2797 | /* There are two situations where a new output varying is needed: | |||
2798 | * | |||
2799 | * - If varying packing is disabled for xfb and the current declaration | |||
2800 | * is not aligned within the top level varying (e.g. vec3_arr[1]). | |||
2801 | * | |||
2802 | * - If a builtin variable needs to be copied to a new variable | |||
2803 | * before its content is modified by another lowering pass (e.g. | |||
2804 | * \c gl_Position is transformed by \c nir_lower_viewport_transform). | |||
2805 | */ | |||
2806 | const unsigned dmul = | |||
2807 | matched_candidate->type->without_array()->is_64bit() ? 2 : 1; | |||
2808 | const bool lowered = | |||
2809 | (disable_xfb_packing && | |||
2810 | !tfeedback_decls[i].is_aligned(dmul, matched_candidate->offset)) || | |||
2811 | (matched_candidate->toplevel_var->data.explicit_location && | |||
2812 | matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 && | |||
2813 | (ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb & | |||
2814 | BITFIELD_BIT(matched_candidate->toplevel_var->data.location)(1u << (matched_candidate->toplevel_var->data.location )))); | |||
2815 | ||||
2816 | if (lowered) { | |||
2817 | ir_variable *new_var; | |||
2818 | tfeedback_candidate *new_candidate = NULL__null; | |||
2819 | ||||
2820 | new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name()); | |||
2821 | if (new_var == NULL__null) { | |||
2822 | ralloc_free(hash_table_ctx); | |||
2823 | return false; | |||
2824 | } | |||
2825 | ||||
2826 | /* Create new candidate and replace matched_candidate */ | |||
2827 | new_candidate = rzalloc(mem_ctx, tfeedback_candidate)((tfeedback_candidate *) rzalloc_size(mem_ctx, sizeof(tfeedback_candidate ))); | |||
2828 | new_candidate->toplevel_var = new_var; | |||
2829 | new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1; | |||
2830 | new_candidate->type = new_var->type; | |||
2831 | new_candidate->offset = 0; | |||
2832 | _mesa_hash_table_insert(tfeedback_candidates, | |||
2833 | ralloc_strdup(mem_ctx, new_var->name), | |||
2834 | new_candidate); | |||
2835 | ||||
2836 | tfeedback_decls[i].set_lowered_candidate(new_candidate); | |||
2837 | matched_candidate = new_candidate; | |||
2838 | } | |||
2839 | ||||
2840 | /* Mark as xfb varying */ | |||
2841 | matched_candidate->toplevel_var->data.is_xfb = 1; | |||
2842 | ||||
2843 | /* Mark xfb varyings as always active */ | |||
2844 | matched_candidate->toplevel_var->data.always_active_io = 1; | |||
2845 | ||||
2846 | /* Mark any corresponding inputs as always active also. We must do this | |||
2847 | * because we have a NIR pass that lowers vectors to scalars and another | |||
2848 | * that removes unused varyings. | |||
2849 | * We don't split varyings marked as always active because there is no | |||
2850 | * point in doing so. This means we need to mark both sides of the | |||
2851 | * interface as always active otherwise we will have a mismatch and | |||
2852 | * start removing things we shouldn't. | |||
2853 | */ | |||
2854 | ir_variable *const input_var = | |||
2855 | linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var, | |||
2856 | consumer_inputs, | |||
2857 | consumer_interface_inputs, | |||
2858 | consumer_inputs_with_locations); | |||
2859 | if (input_var) { | |||
2860 | input_var->data.is_xfb = 1; | |||
2861 | input_var->data.always_active_io = 1; | |||
2862 | } | |||
2863 | ||||
2864 | if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) { | |||
2865 | matched_candidate->toplevel_var->data.is_xfb_only = 1; | |||
2866 | matches.record(matched_candidate->toplevel_var, NULL__null); | |||
2867 | } | |||
2868 | } | |||
2869 | ||||
2870 | uint8_t components[MAX_VARYINGS_INCL_PATCH((((VARYING_SLOT_VAR0 + 32)) + 32) - VARYING_SLOT_VAR0)] = {0}; | |||
2871 | const unsigned slots_used = matches.assign_locations( | |||
2872 | prog, components, reserved_slots); | |||
2873 | matches.store_locations(); | |||
2874 | ||||
2875 | for (unsigned i = 0; i < num_tfeedback_decls; ++i) { | |||
2876 | if (tfeedback_decls[i].is_varying()) { | |||
2877 | if (!tfeedback_decls[i].assign_location(ctx, prog)) { | |||
2878 | ralloc_free(hash_table_ctx); | |||
2879 | return false; | |||
2880 | } | |||
2881 | } | |||
2882 | } | |||
2883 | ralloc_free(hash_table_ctx); | |||
2884 | ||||
2885 | if (consumer && producer) { | |||
2886 | foreach_in_list(ir_instruction, node, consumer->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((consumer ->ir)->head_sentinel.next) ? (ir_instruction *) ((consumer ->ir)->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
2887 | ir_variable *const var = node->as_variable(); | |||
2888 | ||||
2889 | if (var && var->data.mode == ir_var_shader_in && | |||
2890 | var->data.is_unmatched_generic_inout) { | |||
2891 | if (!prog->IsES && prog->data->Version <= 120) { | |||
2892 | /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: | |||
2893 | * | |||
2894 | * Only those varying variables used (i.e. read) in | |||
2895 | * the fragment shader executable must be written to | |||
2896 | * by the vertex shader executable; declaring | |||
2897 | * superfluous varying variables in a vertex shader is | |||
2898 | * permissible. | |||
2899 | * | |||
2900 | * We interpret this text as meaning that the VS must | |||
2901 | * write the variable for the FS to read it. See | |||
2902 | * "glsl1-varying read but not written" in piglit. | |||
2903 | */ | |||
2904 | linker_error(prog, "%s shader varying %s not written " | |||
2905 | "by %s shader\n.", | |||
2906 | _mesa_shader_stage_to_string(consumer->Stage), | |||
2907 | var->name, | |||
2908 | _mesa_shader_stage_to_string(producer->Stage)); | |||
2909 | } else { | |||
2910 | linker_warning(prog, "%s shader varying %s not written " | |||
2911 | "by %s shader\n.", | |||
2912 | _mesa_shader_stage_to_string(consumer->Stage), | |||
2913 | var->name, | |||
2914 | _mesa_shader_stage_to_string(producer->Stage)); | |||
2915 | } | |||
2916 | } | |||
2917 | } | |||
2918 | ||||
2919 | /* Now that validation is done its safe to remove unused varyings. As | |||
2920 | * we have both a producer and consumer its safe to remove unused | |||
2921 | * varyings even if the program is a SSO because the stages are being | |||
2922 | * linked together i.e. we have a multi-stage SSO. | |||
2923 | */ | |||
2924 | remove_unused_shader_inputs_and_outputs(false, producer, | |||
2925 | ir_var_shader_out); | |||
2926 | remove_unused_shader_inputs_and_outputs(false, consumer, | |||
2927 | ir_var_shader_in); | |||
2928 | } | |||
2929 | ||||
2930 | if (producer) { | |||
2931 | lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out, | |||
2932 | 0, producer, disable_varying_packing, | |||
2933 | disable_xfb_packing, xfb_enabled); | |||
2934 | } | |||
2935 | ||||
2936 | if (consumer) { | |||
2937 | lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in, | |||
2938 | consumer_vertices, consumer, disable_varying_packing, | |||
2939 | disable_xfb_packing, xfb_enabled); | |||
2940 | } | |||
2941 | ||||
2942 | return true; | |||
2943 | } | |||
2944 | ||||
2945 | static bool | |||
2946 | check_against_output_limit(struct gl_context *ctx, | |||
2947 | struct gl_shader_program *prog, | |||
2948 | gl_linked_shader *producer, | |||
2949 | unsigned num_explicit_locations) | |||
2950 | { | |||
2951 | unsigned output_vectors = num_explicit_locations; | |||
2952 | ||||
2953 | foreach_in_list(ir_instruction, node, producer->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((producer ->ir)->head_sentinel.next) ? (ir_instruction *) ((producer ->ir)->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
2954 | ir_variable *const var = node->as_variable(); | |||
2955 | ||||
2956 | if (var && !var->data.explicit_location && | |||
2957 | var->data.mode == ir_var_shader_out && | |||
2958 | var_counts_against_varying_limit(producer->Stage, var)) { | |||
2959 | /* outputs for fragment shader can't be doubles */ | |||
2960 | output_vectors += var->type->count_attribute_slots(false); | |||
2961 | } | |||
2962 | } | |||
2963 | ||||
2964 | assert(producer->Stage != MESA_SHADER_FRAGMENT)(static_cast <bool> (producer->Stage != MESA_SHADER_FRAGMENT ) ? void (0) : __assert_fail ("producer->Stage != MESA_SHADER_FRAGMENT" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
2965 | unsigned max_output_components = | |||
2966 | ctx->Const.Program[producer->Stage].MaxOutputComponents; | |||
2967 | ||||
2968 | const unsigned output_components = output_vectors * 4; | |||
2969 | if (output_components > max_output_components) { | |||
2970 | if (ctx->API == API_OPENGLES2 || prog->IsES) | |||
2971 | linker_error(prog, "%s shader uses too many output vectors " | |||
2972 | "(%u > %u)\n", | |||
2973 | _mesa_shader_stage_to_string(producer->Stage), | |||
2974 | output_vectors, | |||
2975 | max_output_components / 4); | |||
2976 | else | |||
2977 | linker_error(prog, "%s shader uses too many output components " | |||
2978 | "(%u > %u)\n", | |||
2979 | _mesa_shader_stage_to_string(producer->Stage), | |||
2980 | output_components, | |||
2981 | max_output_components); | |||
2982 | ||||
2983 | return false; | |||
2984 | } | |||
2985 | ||||
2986 | return true; | |||
2987 | } | |||
2988 | ||||
2989 | static bool | |||
2990 | check_against_input_limit(struct gl_context *ctx, | |||
2991 | struct gl_shader_program *prog, | |||
2992 | gl_linked_shader *consumer, | |||
2993 | unsigned num_explicit_locations) | |||
2994 | { | |||
2995 | unsigned input_vectors = num_explicit_locations; | |||
2996 | ||||
2997 | foreach_in_list(ir_instruction, node, consumer->ir)for (ir_instruction *node = (!exec_node_is_tail_sentinel((consumer ->ir)->head_sentinel.next) ? (ir_instruction *) ((consumer ->ir)->head_sentinel.next) : __null); (node) != __null; (node) = (!exec_node_is_tail_sentinel((node)->next) ? (ir_instruction *) ((node)->next) : __null)) { | |||
2998 | ir_variable *const var = node->as_variable(); | |||
2999 | ||||
3000 | if (var && !var->data.explicit_location && | |||
3001 | var->data.mode == ir_var_shader_in && | |||
3002 | var_counts_against_varying_limit(consumer->Stage, var)) { | |||
3003 | /* vertex inputs aren't varying counted */ | |||
3004 | input_vectors += var->type->count_attribute_slots(false); | |||
3005 | } | |||
3006 | } | |||
3007 | ||||
3008 | assert(consumer->Stage != MESA_SHADER_VERTEX)(static_cast <bool> (consumer->Stage != MESA_SHADER_VERTEX ) ? void (0) : __assert_fail ("consumer->Stage != MESA_SHADER_VERTEX" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
3009 | unsigned max_input_components = | |||
3010 | ctx->Const.Program[consumer->Stage].MaxInputComponents; | |||
3011 | ||||
3012 | const unsigned input_components = input_vectors * 4; | |||
3013 | if (input_components > max_input_components) { | |||
3014 | if (ctx->API == API_OPENGLES2 || prog->IsES) | |||
3015 | linker_error(prog, "%s shader uses too many input vectors " | |||
3016 | "(%u > %u)\n", | |||
3017 | _mesa_shader_stage_to_string(consumer->Stage), | |||
3018 | input_vectors, | |||
3019 | max_input_components / 4); | |||
3020 | else | |||
3021 | linker_error(prog, "%s shader uses too many input components " | |||
3022 | "(%u > %u)\n", | |||
3023 | _mesa_shader_stage_to_string(consumer->Stage), | |||
3024 | input_components, | |||
3025 | max_input_components); | |||
3026 | ||||
3027 | return false; | |||
3028 | } | |||
3029 | ||||
3030 | return true; | |||
3031 | } | |||
3032 | ||||
3033 | bool | |||
3034 | link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last, | |||
3035 | struct gl_context *ctx, void *mem_ctx) | |||
3036 | { | |||
3037 | bool has_xfb_qualifiers = false; | |||
3038 | unsigned num_tfeedback_decls = 0; | |||
3039 | char **varying_names = NULL__null; | |||
3040 | tfeedback_decl *tfeedback_decls = NULL__null; | |||
3041 | ||||
3042 | /* From the ARB_enhanced_layouts spec: | |||
3043 | * | |||
3044 | * "If the shader used to record output variables for transform feedback | |||
3045 | * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout | |||
3046 | * qualifiers, the values specified by TransformFeedbackVaryings are | |||
3047 | * ignored, and the set of variables captured for transform feedback is | |||
3048 | * instead derived from the specified layout qualifiers." | |||
3049 | */ | |||
3050 | for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) { | |||
3051 | /* Find last stage before fragment shader */ | |||
3052 | if (prog->_LinkedShaders[i]) { | |||
3053 | has_xfb_qualifiers = | |||
3054 | process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i], | |||
3055 | prog, &num_tfeedback_decls, | |||
3056 | &varying_names); | |||
3057 | break; | |||
3058 | } | |||
3059 | } | |||
3060 | ||||
3061 | if (!has_xfb_qualifiers) { | |||
3062 | num_tfeedback_decls = prog->TransformFeedback.NumVarying; | |||
3063 | varying_names = prog->TransformFeedback.VaryingNames; | |||
3064 | } | |||
3065 | ||||
3066 | if (num_tfeedback_decls != 0) { | |||
3067 | /* From GL_EXT_transform_feedback: | |||
3068 | * A program will fail to link if: | |||
3069 | * | |||
3070 | * * the <count> specified by TransformFeedbackVaryingsEXT is | |||
3071 | * non-zero, but the program object has no vertex or geometry | |||
3072 | * shader; | |||
3073 | */ | |||
3074 | if (first >= MESA_SHADER_FRAGMENT) { | |||
3075 | linker_error(prog, "Transform feedback varyings specified, but " | |||
3076 | "no vertex, tessellation, or geometry shader is " | |||
3077 | "present.\n"); | |||
3078 | return false; | |||
3079 | } | |||
3080 | ||||
3081 | tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,((tfeedback_decl *) rzalloc_array_size(mem_ctx, sizeof(tfeedback_decl ), num_tfeedback_decls)) | |||
3082 | num_tfeedback_decls)((tfeedback_decl *) rzalloc_array_size(mem_ctx, sizeof(tfeedback_decl ), num_tfeedback_decls)); | |||
3083 | if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls, | |||
3084 | varying_names, tfeedback_decls)) | |||
3085 | return false; | |||
3086 | } | |||
3087 | ||||
3088 | /* If there is no fragment shader we need to set transform feedback. | |||
3089 | * | |||
3090 | * For SSO we also need to assign output locations. We assign them here | |||
3091 | * because we need to do it for both single stage programs and multi stage | |||
3092 | * programs. | |||
3093 | */ | |||
3094 | if (last < MESA_SHADER_FRAGMENT && | |||
3095 | (num_tfeedback_decls != 0 || prog->SeparateShader)) { | |||
3096 | const uint64_t reserved_out_slots = | |||
3097 | reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out); | |||
3098 | if (!assign_varying_locations(ctx, mem_ctx, prog, | |||
3099 | prog->_LinkedShaders[last], NULL__null, | |||
3100 | num_tfeedback_decls, tfeedback_decls, | |||
3101 | reserved_out_slots)) | |||
3102 | return false; | |||
3103 | } | |||
3104 | ||||
3105 | if (last <= MESA_SHADER_FRAGMENT) { | |||
3106 | /* Remove unused varyings from the first/last stage unless SSO */ | |||
3107 | remove_unused_shader_inputs_and_outputs(prog->SeparateShader, | |||
3108 | prog->_LinkedShaders[first], | |||
3109 | ir_var_shader_in); | |||
3110 | remove_unused_shader_inputs_and_outputs(prog->SeparateShader, | |||
3111 | prog->_LinkedShaders[last], | |||
3112 | ir_var_shader_out); | |||
3113 | ||||
3114 | /* If the program is made up of only a single stage */ | |||
3115 | if (first == last) { | |||
3116 | gl_linked_shader *const sh = prog->_LinkedShaders[last]; | |||
3117 | ||||
3118 | do_dead_builtin_varyings(ctx, NULL__null, sh, 0, NULL__null); | |||
3119 | do_dead_builtin_varyings(ctx, sh, NULL__null, num_tfeedback_decls, | |||
3120 | tfeedback_decls); | |||
3121 | ||||
3122 | if (prog->SeparateShader) { | |||
3123 | const uint64_t reserved_slots = | |||
3124 | reserved_varying_slot(sh, ir_var_shader_in); | |||
3125 | ||||
3126 | /* Assign input locations for SSO, output locations are already | |||
3127 | * assigned. | |||
3128 | */ | |||
3129 | if (!assign_varying_locations(ctx, mem_ctx, prog, | |||
3130 | NULL__null /* producer */, | |||
3131 | sh /* consumer */, | |||
3132 | 0 /* num_tfeedback_decls */, | |||
3133 | NULL__null /* tfeedback_decls */, | |||
3134 | reserved_slots)) | |||
3135 | return false; | |||
3136 | } | |||
3137 | } else { | |||
3138 | /* Linking the stages in the opposite order (from fragment to vertex) | |||
3139 | * ensures that inter-shader outputs written to in an earlier stage | |||
3140 | * are eliminated if they are (transitively) not used in a later | |||
3141 | * stage. | |||
3142 | */ | |||
3143 | int next = last; | |||
3144 | for (int i = next - 1; i >= 0; i--) { | |||
3145 | if (prog->_LinkedShaders[i] == NULL__null && i != 0) | |||
3146 | continue; | |||
3147 | ||||
3148 | gl_linked_shader *const sh_i = prog->_LinkedShaders[i]; | |||
3149 | gl_linked_shader *const sh_next = prog->_LinkedShaders[next]; | |||
3150 | ||||
3151 | const uint64_t reserved_out_slots = | |||
3152 | reserved_varying_slot(sh_i, ir_var_shader_out); | |||
3153 | const uint64_t reserved_in_slots = | |||
3154 | reserved_varying_slot(sh_next, ir_var_shader_in); | |||
3155 | ||||
3156 | do_dead_builtin_varyings(ctx, sh_i, sh_next, | |||
3157 | next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, | |||
3158 | tfeedback_decls); | |||
3159 | ||||
3160 | if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, | |||
3161 | next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, | |||
3162 | tfeedback_decls, | |||
3163 | reserved_out_slots | reserved_in_slots)) | |||
3164 | return false; | |||
3165 | ||||
3166 | /* This must be done after all dead varyings are eliminated. */ | |||
3167 | if (sh_i != NULL__null) { | |||
3168 | unsigned slots_used = util_bitcount64(reserved_out_slots); | |||
3169 | if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) { | |||
3170 | return false; | |||
3171 | } | |||
3172 | } | |||
3173 | ||||
3174 | unsigned slots_used = util_bitcount64(reserved_in_slots); | |||
3175 | if (!check_against_input_limit(ctx, prog, sh_next, slots_used)) | |||
3176 | return false; | |||
3177 | ||||
3178 | next = i; | |||
3179 | } | |||
3180 | } | |||
3181 | } | |||
3182 | ||||
3183 | if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls, | |||
3184 | has_xfb_qualifiers, mem_ctx)) | |||
3185 | return false; | |||
3186 | ||||
3187 | return true; | |||
3188 | } |