Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.x] Fix multiple ubershader bugs #64096

Merged
merged 3 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions drivers/gles3/rasterizer_scene_gles3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1855,7 +1855,10 @@ void RasterizerSceneGLES3::_setup_light(RenderList::Element *e, const Transform
// Normally, lightmapping uses the same texturing units than the GI probes; however, in the case of the ubershader
// that's not a good idea because some hardware/drivers (Android/Intel) may fail to render if a single texturing unit
// is used through multiple kinds of samplers in the same shader.
if (state.scene_shader.is_version_ubershader()) {
// Moreover, since we don't know at this point if we are going to consume these textures from the ubershader or
// a conditioned one, the fact that async compilation is enabled is enough for us to switch to the alternative
// arrangement of texturing units.
if (storage->config.async_compilation_enabled) {
glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 12);
} else {
glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 10);
Expand All @@ -1871,7 +1874,7 @@ void RasterizerSceneGLES3::_setup_light(RenderList::Element *e, const Transform
if (gi_probe_count > 1) {
GIProbeInstance *gipi2 = gi_probe_instance_owner.getptr(ridp[1]);

if (state.scene_shader.is_version_ubershader()) {
if (storage->config.async_compilation_enabled) {
glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 13);
} else {
glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 11);
Expand Down Expand Up @@ -4114,6 +4117,13 @@ void RasterizerSceneGLES3::render_scene(const Transform &p_cam_transform, const
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LESS);
state.ubo_data.shadow_atlas_pixel_size[0] = 1.0 / shadow_atlas->size;
state.ubo_data.shadow_atlas_pixel_size[1] = 1.0 / shadow_atlas->size;
} else {
if (storage->config.async_compilation_enabled) {
// Avoid GL UB message id 131222 caused by shadow samplers not properly set up in the ubershader
glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 6);
glBindTexture(GL_TEXTURE_2D, storage->resources.depth_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
}
}

if (reflection_atlas && reflection_atlas->size) {
Expand Down Expand Up @@ -4869,6 +4879,13 @@ void RasterizerSceneGLES3::render_shadow(RID p_light, RID p_shadow_atlas, int p_
state.ubo_data.shadow_dual_paraboloid_render_zfar = zfar;
state.ubo_data.opaque_prepass_threshold = 0.1;

if (storage->config.async_compilation_enabled) {
// Avoid GL UB message id 131222 caused by shadow samplers not properly set up in the ubershader
glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 6);
glBindTexture(GL_TEXTURE_2D, storage->resources.depth_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
}

_setup_environment(nullptr, light_projection, light_transform);

state.scene_shader.set_conditional(SceneShaderGLES3::RENDER_DEPTH, true);
Expand Down Expand Up @@ -5300,7 +5317,7 @@ void RasterizerSceneGLES3::initialize() {
glFrontFace(GL_CW);

if (storage->config.async_compilation_enabled) {
state.scene_shader.init_async_compilation(storage->resources.depth_tex);
state.scene_shader.init_async_compilation();
}
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/gles3/rasterizer_storage_gles3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8369,7 +8369,7 @@ void RasterizerStorageGLES3::initialize() {
shaders.cubemap_filter.set_conditional(CubemapFilterShaderGLES3::LOW_QUALITY, !ggx_hq);
shaders.particles.init();
if (config.async_compilation_enabled) {
shaders.particles.init_async_compilation(resources.depth_tex);
shaders.particles.init_async_compilation();
}

#ifdef GLES_OVER_GL
Expand Down
46 changes: 16 additions & 30 deletions drivers/gles3/shader_gles3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,45 +177,29 @@ bool ShaderGLES3::is_custom_code_ready_for_render(uint32_t p_code_id) {
return true;
}

bool ShaderGLES3::_bind_ubershader() {
bool ShaderGLES3::_bind_ubershader(bool p_for_warmup) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_V(!is_async_compilation_supported(), false);
ERR_FAIL_COND_V(get_ubershader_flags_uniform() == -1, false);
#endif
new_conditional_version.version |= VersionKey::UBERSHADER_FLAG;
bool bound = _bind(true);
new_conditional_version.version &= ~VersionKey::UBERSHADER_FLAG;
if (p_for_warmup) {
// Avoid GL UB message id 131222 caused by shadow samplers not properly set up yet
unbind();
return bound;
}
int conditionals_uniform = _get_uniform(get_ubershader_flags_uniform());
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_V(conditionals_uniform == -1, false);
#endif
new_conditional_version.version &= ~VersionKey::UBERSHADER_FLAG;
#ifdef DEV_ENABLED
// So far we don't need bit 31 for conditionals. That allows us to use signed integers,
// which are more compatible across GL driver vendors.
CRASH_COND(new_conditional_version.version >= 0x80000000);
#endif
glUniform1i(conditionals_uniform, new_conditional_version.version);

// This is done to avoid running into the GL UB message id 131222. Long explanation:
// If an ubershader has shadow samplers, they are generally not used if the current material has shadowing disabled,
// but that also implies the rasterizer won't do any preparation to the relevant shadow samplers (which won't really exist,
// so that's the correct way in a conditioned shader).
// However, in the case of the ubershader those shadow samplers are unconditionally declared, although potentially unused and
// thus "uninitialized". Sampling in that situation (compare disabled, no depth texture bound) is undefined behavior for GL.
// And that's a problem for us because, even if dynamic branching will serve to avoid using the unprepared sampler when shadowing
// is not enabled, the GPU may still run the other branch. And it's not just that the results from the sampling are undefined
// (that wouldn't be a problem and we could just ignore the warning); the problem is that sampling in that state is fully UB.
for (int i = 0; i < shadow_texunit_count; i++) {
int unit = shadow_texunits[i];
if (unit >= 0) {
glActiveTexture(GL_TEXTURE0 + unit);
} else {
glActiveTexture(GL_TEXTURE0 + max_image_units + unit);
}
glBindTexture(GL_TEXTURE_2D, depth_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
}

return bound;
}

Expand Down Expand Up @@ -598,6 +582,11 @@ ShaderGLES3::Version *ShaderGLES3::get_current_version(bool &r_async_forbidden)
strings_common.push_back("\n");
}

if (is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) {
// Indicate that this shader may be used both as ubershader and conditioned during the session
strings_common.push_back("#define UBERSHADER_COMPAT\n");
}

LocalVector<CharString> flag_macros;
bool build_ubershader = get_ubershader_flags_uniform() != -1 && (effective_version.version & VersionKey::UBERSHADER_FLAG);
if (build_ubershader) {
Expand Down Expand Up @@ -1128,7 +1117,7 @@ GLint ShaderGLES3::get_uniform_location(const String &p_name) const {
return glGetUniformLocation(version->ids.main, p_name.ascii().get_data());
}

void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const int *p_shadow_texunits, int p_shadow_texunit_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) {
void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) {
ERR_FAIL_COND(version);
conditional_version.key = 0;
new_conditional_version.key = 0;
Expand All @@ -1140,8 +1129,6 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co
fragment_code = p_fragment_code;
texunit_pairs = p_texunit_pairs;
texunit_pair_count = p_texunit_pair_count;
shadow_texunits = p_shadow_texunits;
shadow_texunit_count = p_shadow_texunit_count;
vertex_code_start = p_vertex_code_start;
fragment_code_start = p_fragment_code_start;
attribute_pairs = p_attribute_pairs;
Expand Down Expand Up @@ -1232,12 +1219,11 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co
glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &max_image_units);
}

void ShaderGLES3::init_async_compilation(GLuint p_depth_tex) {
depth_tex = p_depth_tex;
void ShaderGLES3::init_async_compilation() {
if (is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) {
// Warm up the ubershader for the case of no custom code
new_conditional_version.code_version = 0;
_bind_ubershader();
_bind_ubershader(true);
}
}

Expand Down Expand Up @@ -1297,7 +1283,7 @@ void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String &p_ver
if (p_async_mode == ASYNC_MODE_VISIBLE && is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) {
// Warm up the ubershader for this custom code
new_conditional_version.code_version = p_code_id;
_bind_ubershader();
_bind_ubershader(true);
}
}

Expand Down
10 changes: 3 additions & 7 deletions drivers/gles3/shader_gles3.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ class ShaderGLES3 {
//@TODO Optimize to a fixed set of shader pools and use a LRU
int uniform_count;
int texunit_pair_count;
int shadow_texunit_count;
int conditional_count;
int ubo_count;
int feedback_count;
Expand Down Expand Up @@ -246,7 +245,6 @@ class ShaderGLES3 {
const char **uniform_names;
const AttributePair *attribute_pairs;
const TexUnitPair *texunit_pairs;
const int *shadow_texunits;
const UBOPair *ubo_pairs;
const Feedback *feedbacks;
const char *vertex_code;
Expand Down Expand Up @@ -280,7 +278,6 @@ class ShaderGLES3 {
static ShaderGLES3 *active;

int max_image_units;
GLuint depth_tex = 0;

_FORCE_INLINE_ void _set_uniform_variant(GLint p_uniform, const Variant &p_value) {
if (p_uniform < 0) {
Expand Down Expand Up @@ -372,13 +369,13 @@ class ShaderGLES3 {
}

bool _bind(bool p_binding_fallback);
bool _bind_ubershader();
bool _bind_ubershader(bool p_for_warmrup = false);

protected:
_FORCE_INLINE_ int _get_uniform(int p_which) const;
_FORCE_INLINE_ void _set_conditional(int p_which, bool p_value);

void setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const int *p_shadow_texunits, int p_shadow_texunit_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start);
void setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start);

ShaderGLES3();

Expand All @@ -403,11 +400,10 @@ class ShaderGLES3 {
bool is_custom_code_ready_for_render(uint32_t p_code_id);

uint32_t get_version() const { return new_conditional_version.version; }
bool is_version_ubershader() const { return (new_conditional_version.version & VersionKey::UBERSHADER_FLAG); }
_FORCE_INLINE_ bool is_version_valid() const { return version && version->compile_status == Version::COMPILE_STATUS_OK; }

virtual void init() = 0;
void init_async_compilation(GLuint p_depth_tex);
void init_async_compilation();
bool is_async_compilation_supported();
void finish();

Expand Down
16 changes: 8 additions & 8 deletions drivers/gles3/shaders/scene.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -1649,7 +1649,7 @@ uniform mediump vec4[12] lightmap_captures;

#ifdef USE_GI_PROBES //ubershader-skip

#if !defined(IS_UBERSHADER)
#if !defined(UBERSHADER_COMPAT)
uniform mediump sampler3D gi_probe1; //texunit:-10
#else
uniform mediump sampler3D gi_probe1_uber; //texunit:-12
Expand All @@ -1663,7 +1663,7 @@ uniform highp float gi_probe_bias1;
uniform highp float gi_probe_normal_bias1;
uniform bool gi_probe_blend_ambient1;

#if !defined(IS_UBERSHADER)
#if !defined(UBERSHADER_COMPAT)
uniform mediump sampler3D gi_probe2; //texunit:-11
#else
uniform mediump sampler3D gi_probe2_uber; //texunit:-13
Expand Down Expand Up @@ -2376,9 +2376,9 @@ FRAGMENT_SHADER_CODE
float max_ambient = max(ambient_light.r, max(ambient_light.g, ambient_light.b));
float max_diffuse = max(diffuse_light.r, max(diffuse_light.g, diffuse_light.b));
float total_ambient = max_ambient + max_diffuse;
#ifdef USE_FORWARD_LIGHTING
#ifdef USE_FORWARD_LIGHTING //ubershader-runtime
total_ambient += max_emission;
#endif
#endif //ubershader-runtime
float ambient_scale = (total_ambient > 0.0) ? (max_ambient + ambient_occlusion_affect_light * max_diffuse) / total_ambient : 0.0;

#if defined(ENABLE_AO)
Expand All @@ -2387,9 +2387,9 @@ FRAGMENT_SHADER_CODE
diffuse_buffer = vec4(diffuse_light + ambient_light, ambient_scale);
specular_buffer = vec4(specular_light, metallic);

#ifdef USE_FORWARD_LIGHTING
#ifdef USE_FORWARD_LIGHTING //ubershader-runtime
diffuse_buffer.rgb += emission;
#endif
#endif //ubershader-runtime
#endif //SHADELESS //ubershader-runtime

normal_mr_buffer = vec4(normalize(normal) * 0.5 + 0.5, roughness);
Expand All @@ -2404,9 +2404,9 @@ FRAGMENT_SHADER_CODE
frag_color = vec4(albedo, alpha);
#else //ubershader-runtime
frag_color = vec4(ambient_light + diffuse_light + specular_light, alpha);
#ifdef USE_FORWARD_LIGHTING
#ifdef USE_FORWARD_LIGHTING //ubershader-runtime
frag_color.rgb += emission;
#endif
#endif //ubershader-runtime
#endif //SHADELESS //ubershader-runtime

#endif //USE_MULTIPLE_RENDER_TARGETS //ubershader-runtime
Expand Down
16 changes: 0 additions & 16 deletions gles_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def __init__(self):
self.enums = {}
self.texunits = []
self.texunit_names = []
self.shadow_texunits = []
self.ubos = []
self.ubo_names = []

Expand Down Expand Up @@ -107,8 +106,6 @@ def include_file_in_legacygl_header(filename, header_data, depth):

if not x in header_data.texunit_names:
header_data.texunits += [(x, texunit)]
if line.find("sampler2DShadow") != -1:
header_data.shadow_texunits += [texunit]
header_data.texunit_names += [x]

elif line.find("uniform") != -1 and line.lower().find("ubo:") != -1:
Expand Down Expand Up @@ -486,15 +483,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2
else:
fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n")

if not gles2:
if header_data.shadow_texunits:
fd.write("\t\tstatic int _shadow_texunits[]={")
for x in header_data.shadow_texunits:
fd.write(str(x) + ',')
fd.write("};\n\n")
else:
fd.write("\t\tstatic int *_shadow_texunits=NULL;\n")

if not gles2 and header_data.ubos:
fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n")
for x in header_data.ubos:
Expand Down Expand Up @@ -549,8 +537,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2
+ str(len(header_data.attributes))
+ ", _texunit_pairs,"
+ str(len(header_data.texunits))
+ ", _shadow_texunits,"
+ str(len(header_data.shadow_texunits))
+ ",_ubo_pairs,"
+ str(len(header_data.ubos))
+ ",_feedbacks,"
Expand Down Expand Up @@ -580,8 +566,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2
+ str(len(header_data.uniforms))
+ ",_texunit_pairs,"
+ str(len(header_data.texunits))
+ ",_shadow_texunits,"
+ str(len(header_data.shadow_texunits))
+ ",_enums,"
+ str(len(header_data.enums))
+ ",_enum_values,"
Expand Down