From 6ddf4b241f19ecec89b867fc8d5fa30d1645fd12 Mon Sep 17 00:00:00 2001
From: Steveice10 <1269164+Steveice10@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:22:25 -0700
Subject: [PATCH] renderer/vulkan: Emulate custom border colors in shaders when
 unavailable. (#6878)

---
 CMakeModules/GenerateSCMRev.cmake             |   8 +
 src/video_core/rasterizer_accelerated.cpp     |  21 ++
 src/video_core/rasterizer_accelerated.h       |   3 +
 .../renderer_vulkan/vk_instance.cpp           |   4 +-
 .../renderer_vulkan/vk_shader_gen.cpp         | 162 ++++++----
 .../renderer_vulkan/vk_shader_gen.h           |   5 +
 .../renderer_vulkan/vk_shader_gen_spv.cpp     | 292 ++++++++++--------
 .../renderer_vulkan/vk_shader_gen_spv.h       |  18 +-
 src/video_core/shader/shader_uniforms.cpp     |   1 +
 src/video_core/shader/shader_uniforms.h       |   3 +-
 10 files changed, 322 insertions(+), 195 deletions(-)

diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index f0c8e7da6..ee077cbbd 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -12,8 +12,16 @@ set(HASH_FILES
     "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
     "${VIDEO_CORE}/renderer_opengl/gl_shader_util.cpp"
     "${VIDEO_CORE}/renderer_opengl/gl_shader_util.h"
+    "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.cpp"
+    "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.h"
+    "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.cpp"
+    "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.h"
+    "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp"
+    "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h"
     "${VIDEO_CORE}/shader/shader.cpp"
     "${VIDEO_CORE}/shader/shader.h"
+    "${VIDEO_CORE}/shader/shader_uniforms.cpp"
+    "${VIDEO_CORE}/shader/shader_uniforms.h"
     "${VIDEO_CORE}/pica.cpp"
     "${VIDEO_CORE}/pica.h"
     "${VIDEO_CORE}/regs_framebuffer.h"
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
index 2dcae06f8..bcd40354d 100644
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -599,6 +599,17 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) {
         SyncTextureLodBias(2);
         break;
 
+    // Texture borders
+    case PICA_REG_INDEX(texturing.texture0.border_color):
+        SyncTextureBorderColor(0);
+        break;
+    case PICA_REG_INDEX(texturing.texture1.border_color):
+        SyncTextureBorderColor(1);
+        break;
+    case PICA_REG_INDEX(texturing.texture2.border_color):
+        SyncTextureBorderColor(2);
+        break;
+
     // Clipping plane
     case PICA_REG_INDEX(rasterizer.clip_coef[0]):
     case PICA_REG_INDEX(rasterizer.clip_coef[1]):
@@ -821,6 +832,16 @@ void RasterizerAccelerated::SyncTextureLodBias(int tex_index) {
     }
 }
 
+void RasterizerAccelerated::SyncTextureBorderColor(int tex_index) {
+    const auto pica_textures = regs.texturing.GetTextures();
+    const auto params = pica_textures[tex_index].config;
+    const Common::Vec4f border_color = ColorRGBA8(params.border_color.raw);
+    if (border_color != uniform_block_data.data.tex_border_color[tex_index]) {
+        uniform_block_data.data.tex_border_color[tex_index] = border_color;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerAccelerated::SyncClipCoef() {
     const auto raw_clip_coef = regs.rasterizer.GetClipCoef();
     const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(),
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
index d9191226b..2ad11ee32 100644
--- a/src/video_core/rasterizer_accelerated.h
+++ b/src/video_core/rasterizer_accelerated.h
@@ -97,6 +97,9 @@ protected:
     /// Syncs the texture LOD bias to match the PICA register
     void SyncTextureLodBias(int tex_index);
 
+    /// Syncs the texture border color to match the PICA registers
+    void SyncTextureBorderColor(int tex_index);
+
     /// Syncs the clip coefficients to match the PICA register
     void SyncClipCoef();
 
diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp
index 2388032a8..9d1b1935e 100644
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@@ -409,7 +409,9 @@ bool Instance::CreateDevice() {
     const bool has_extended_dynamic_state =
         add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, is_arm || is_qualcomm,
                       "it is broken on Qualcomm and ARM drivers");
-    const bool has_custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+    const bool has_custom_border_color =
+        add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, is_qualcomm,
+                      "it is broken on most Qualcomm driver versions");
     const bool has_index_type_uint8 = add_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
     const bool has_pipeline_creation_cache_control =
         add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME);
diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp
index 8ad0eb4d8..bfd954b84 100644
--- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp
@@ -69,6 +69,17 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
 
     state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0);
 
+    const auto pica_textures = regs.texturing.GetTextures();
+    for (u32 tex_index = 0; tex_index < 3; tex_index++) {
+        const auto config = pica_textures[tex_index].config;
+        state.texture_border_color[tex_index].enable_s.Assign(
+            !instance.IsCustomBorderColorSupported() &&
+            config.wrap_s == TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
+        state.texture_border_color[tex_index].enable_t.Assign(
+            !instance.IsCustomBorderColorSupported() &&
+            config.wrap_t == TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
+    }
+
     // Emulate logic op in the shader if not supported. This is mostly for mobile GPUs
     const bool emulate_logic_op = instance.NeedsLogicOpEmulation() &&
                                   !Pica::g_state.regs.framebuffer.output_merger.alphablend_enable;
@@ -284,54 +295,6 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
             stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
 }
 
-static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) {
-    const auto& state = config.state;
-    switch (texture_unit) {
-    case 0:
-        // Only unit 0 respects the texturing type
-        switch (state.texture0_type) {
-        case TexturingRegs::TextureConfig::Texture2D:
-            return "textureLod(tex0, texcoord0, getLod(texcoord0 * "
-                   "vec2(textureSize(tex0, 0))) + tex_lod_bias[0])";
-        case TexturingRegs::TextureConfig::Projection2D:
-            // TODO (wwylele): find the exact LOD formula for projection texture
-            return "textureProj(tex0, vec3(texcoord0, texcoord0_w))";
-        case TexturingRegs::TextureConfig::TextureCube:
-            return "texture(tex_cube, vec3(texcoord0, texcoord0_w))";
-        case TexturingRegs::TextureConfig::Shadow2D:
-            return "shadowTexture(texcoord0, texcoord0_w)";
-        case TexturingRegs::TextureConfig::ShadowCube:
-            return "shadowTextureCube(texcoord0, texcoord0_w)";
-        case TexturingRegs::TextureConfig::Disabled:
-            return "vec4(0.0)";
-        default:
-            LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type);
-            UNIMPLEMENTED();
-            return "texture(tex0, texcoord0)";
-        }
-    case 1:
-        return "textureLod(tex1, texcoord1, getLod(texcoord1 * "
-               "vec2(textureSize(tex1, 0))) + tex_lod_bias[1])";
-    case 2:
-        if (state.texture2_use_coord1)
-            return "textureLod(tex2, texcoord1, getLod(texcoord1 * "
-                   "vec2(textureSize(tex2, 0))) + tex_lod_bias[2])";
-        else
-            return "textureLod(tex2, texcoord2, getLod(texcoord2 * "
-                   "vec2(textureSize(tex2, 0))) + tex_lod_bias[2])";
-    case 3:
-        if (state.proctex.enable) {
-            return "ProcTex()";
-        } else {
-            LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it");
-            return "vec4(0.0)";
-        }
-    default:
-        UNREACHABLE();
-        return "";
-    }
-}
-
 /// Writes the specified TEV stage source component(s)
 static void AppendSource(std::string& out, const PicaFSConfig& config,
                          TevStageConfig::Source source, std::string_view index_name) {
@@ -347,16 +310,16 @@ static void AppendSource(std::string& out, const PicaFSConfig& config,
         out += "secondary_fragment_color";
         break;
     case Source::Texture0:
-        out += SampleTexture(config, 0);
+        out += "sampleTexUnit0()";
         break;
     case Source::Texture1:
-        out += SampleTexture(config, 1);
+        out += "sampleTexUnit1()";
         break;
     case Source::Texture2:
-        out += SampleTexture(config, 2);
+        out += "sampleTexUnit2()";
         break;
     case Source::Texture3:
-        out += SampleTexture(config, 3);
+        out += "sampleTexUnit3()";
         break;
     case Source::PreviousBuffer:
         out += "combiner_buffer";
@@ -656,7 +619,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) {
 
     // Compute fragment normals and tangents
     const auto perturbation = [&] {
-        return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector));
+        return fmt::format("2.0 * (sampleTexUnit{}()).rgb - 1.0", lighting.bump_selector);
     };
 
     switch (lighting.bump_mode) {
@@ -700,7 +663,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) {
            "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n";
 
     if (lighting.enable_shadow) {
-        std::string shadow_texture = SampleTexture(config, lighting.shadow_selector);
+        std::string shadow_texture = fmt::format("sampleTexUnit{}()", lighting.shadow_selector);
         if (lighting.shadow_invert) {
             out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture);
         } else {
@@ -1310,6 +1273,7 @@ float mix2(vec4 s, vec2 a) {
 
 vec4 shadowTexture(vec2 uv, float w) {
 )";
+
     if (!config.state.shadow_texture_orthographic) {
         out += "uv /= w;";
     }
@@ -1344,9 +1308,7 @@ vec4 shadowTextureCube(vec2 uv, float w) {
         uv = -c.xy;
         if (c.z > 0.0) uv.x = -uv.x;
     }
-)";
-    out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));";
-    out += R"(
+    uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));
     vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5);
     vec2 coord_floor = floor(coord);
     vec2 f = coord - coord_floor;
@@ -1409,10 +1371,92 @@ vec4 shadowTextureCube(vec2 uv, float w) {
         CompareShadow(pixels.w, z));
     return vec4(mix2(s, f));
 }
-)";
+    )";
 
-    if (config.state.proctex.enable)
+    if (config.state.proctex.enable) {
         AppendProcTexSampler(out, config);
+    }
+
+    for (u32 texture_unit = 0; texture_unit < 4; texture_unit++) {
+        out += fmt::format("vec4 sampleTexUnit{}() {{", texture_unit);
+        if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) {
+            out += "return vec4(0.0);}";
+            continue;
+        } else if (texture_unit == 3) {
+            if (state.proctex.enable) {
+                out += "return ProcTex();}";
+            } else {
+                out += "return vec4(0.0);}";
+            }
+            continue;
+        }
+
+        u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit;
+        if (config.state.texture_border_color[texture_unit].enable_s) {
+            out += fmt::format(R"(
+            if (texcoord{}.x < 0 || texcoord{}.x > 1) {{
+                return tex_border_color[{}];
+            }}
+            )",
+                               texcoord_num, texcoord_num, texture_unit);
+        }
+        if (config.state.texture_border_color[texture_unit].enable_t) {
+            out += fmt::format(R"(
+            if (texcoord{}.y < 0 || texcoord{}.y > 1) {{
+                return tex_border_color[{}];
+            }}
+            )",
+                               texcoord_num, texcoord_num, texture_unit);
+        }
+        // TODO: 3D border?
+
+        switch (texture_unit) {
+        case 0:
+            // Only unit 0 respects the texturing type
+            switch (state.texture0_type) {
+            case TexturingRegs::TextureConfig::Texture2D:
+                out += "return textureLod(tex0, texcoord0, getLod(texcoord0 * "
+                       "vec2(textureSize(tex0, 0))) + tex_lod_bias[0]);";
+                break;
+            case TexturingRegs::TextureConfig::Projection2D:
+                // TODO (wwylele): find the exact LOD formula for projection texture
+                out += "return textureProj(tex0, vec3(texcoord0, texcoord0_w));";
+                break;
+            case TexturingRegs::TextureConfig::TextureCube:
+                out += "return texture(tex_cube, vec3(texcoord0, texcoord0_w));";
+                break;
+            case TexturingRegs::TextureConfig::Shadow2D:
+                out += "return shadowTexture(texcoord0, texcoord0_w);";
+                break;
+            case TexturingRegs::TextureConfig::ShadowCube:
+                out += "return shadowTextureCube(texcoord0, texcoord0_w);";
+                break;
+            default:
+                LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type);
+                UNIMPLEMENTED();
+                out += "return texture(tex0, texcoord0);";
+                break;
+            }
+        case 1:
+            out += "return textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, "
+                   "0))) + tex_lod_bias[1]);";
+            break;
+        case 2:
+            if (state.texture2_use_coord1) {
+                out += "return textureLod(tex2, texcoord1, getLod(texcoord1 * "
+                       "vec2(textureSize(tex2, 0))) + tex_lod_bias[1]);";
+            } else {
+                out += "return textureLod(tex2, texcoord2, getLod(texcoord2 * "
+                       "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);";
+            }
+            break;
+        default:
+            UNREACHABLE();
+            break;
+        }
+
+        out += "}";
+    }
 
     // We round the interpolated primary color to the nearest 1/255th
     // This maintains the PICA's 8 bits of precision
diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h
index bd901b006..52fa832ff 100644
--- a/src/video_core/renderer_vulkan/vk_shader_gen.h
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.h
@@ -57,6 +57,11 @@ struct PicaFSConfigState {
         BitField<28, 1, u32> shadow_texture_orthographic;
     };
 
+    union {
+        BitField<0, 1, u32> enable_s;
+        BitField<1, 1, u32> enable_t;
+    } texture_border_color[3];
+
     std::array<TevStageConfigRaw, 6> tev_stages;
 
     struct {
diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
index cf26a927f..332021944 100644
--- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
@@ -21,8 +21,8 @@ FragmentModule::FragmentModule(Core::TelemetrySession& telemetry_, const PicaFSC
     DefineArithmeticTypes();
     DefineUniformStructs();
     DefineInterface();
-    if (config.state.proctex.enable) {
-        DefineProcTexSampler();
+    for (u32 i = 0; i < NUM_TEX_UNITS; i++) {
+        DefineTexSampler(i);
     }
     DefineEntryPoint();
 }
@@ -225,7 +225,8 @@ void FragmentModule::WriteLighting() {
 
     // Compute fragment normals and tangents
     const auto perturbation = [&]() -> Id {
-        const Id texel{SampleTexture(lighting.bump_selector)};
+        const Id texel{
+            OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.bump_selector])};
         const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)};
         const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))};
         return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f));
@@ -284,7 +285,7 @@ void FragmentModule::WriteLighting() {
 
     Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)};
     if (lighting.enable_shadow) {
-        shadow = SampleTexture(lighting.shadow_selector);
+        shadow = OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.shadow_selector]);
         if (lighting.shadow_invert) {
             shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow);
         }
@@ -710,89 +711,6 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func)
     }
 }
 
-Id FragmentModule::SampleTexture(u32 texture_unit) {
-    const PicaFSConfigState& state = config.state;
-    const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
-
-    // PICA's LOD formula for 2D textures.
-    // This LOD formula is the same as the LOD lower limit defined in OpenGL.
-    // f(x, y) >= max{m_u, m_v, m_w}
-    // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
-    const auto sample_lod = [this, texture_unit](Id tex_id, Id texcoord_id) {
-        const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)};
-        const Id tex_image{OpImage(image2d_id, sampled_image)};
-        const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
-        const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
-        const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
-        const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
-        const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
-        const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
-        const Id dx_dy_max{
-            OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
-        const Id lod{OpLog2(f32_id, dx_dy_max)};
-        const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))};
-        const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)};
-        return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord,
-                                        spv::ImageOperandsMask::Lod, biased_lod);
-    };
-
-    const auto sample = [this](Id tex_id, bool projection) {
-        const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id;
-        const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)};
-        const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
-        const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
-        const Id coord{OpCompositeConstruct(vec_ids.Get(3),
-                                            OpCompositeExtract(f32_id, texcoord0, 0),
-                                            OpCompositeExtract(f32_id, texcoord0, 1), texcoord0_w)};
-        if (projection) {
-            return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
-        } else {
-            return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
-        }
-    };
-
-    switch (texture_unit) {
-    case 0:
-        // Only unit 0 respects the texturing type
-        switch (state.texture0_type) {
-        case Pica::TexturingRegs::TextureConfig::Texture2D:
-            return sample_lod(tex0_id, texcoord0_id);
-        case Pica::TexturingRegs::TextureConfig::Projection2D:
-            return sample(tex0_id, true);
-        case Pica::TexturingRegs::TextureConfig::TextureCube:
-            return sample(tex_cube_id, false);
-        case Pica::TexturingRegs::TextureConfig::Shadow2D:
-            return SampleShadow();
-        // case Pica::TexturingRegs::TextureConfig::ShadowCube:
-        // return "shadowTextureCube(texcoord0, texcoord0_w)";
-        case Pica::TexturingRegs::TextureConfig::Disabled:
-            return zero_vec;
-        default:
-            LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
-            UNIMPLEMENTED();
-            return zero_vec;
-        }
-    case 1:
-        return sample_lod(tex1_id, texcoord1_id);
-    case 2:
-        if (state.texture2_use_coord1) {
-            return sample_lod(tex2_id, texcoord1_id);
-        } else {
-            return sample_lod(tex2_id, texcoord2_id);
-        }
-    case 3:
-        if (state.proctex.enable) {
-            return OpFunctionCall(vec_ids.Get(4), proctex_func);
-        } else {
-            LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
-            return zero_vec;
-        }
-    default:
-        UNREACHABLE();
-        return void_id;
-    }
-}
-
 Id FragmentModule::CompareShadow(Id pixel, Id z) {
     const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))};
     const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))};
@@ -802,7 +720,7 @@ Id FragmentModule::CompareShadow(Id pixel, Id z) {
 }
 
 Id FragmentModule::SampleShadow() {
-    const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
+    const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord_id[0])};
     const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
     const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w), ConstF32(1.f)),
                               ConstF32(16777215.f))};
@@ -941,11 +859,145 @@ Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id
     return ProcTexLookupLUT(offset, combined);
 }
 
-void FragmentModule::DefineProcTexSampler() {
+void FragmentModule::DefineTexSampler(u32 texture_unit) {
+    const PicaFSConfigState& state = config.state;
+
     const Id func_type{TypeFunction(vec_ids.Get(4))};
-    proctex_func = OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type);
+    sample_tex_unit_func[texture_unit] =
+        OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type);
     AddLabel(OpLabel());
 
+    const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
+
+    if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) {
+        OpReturnValue(zero_vec);
+        OpFunctionEnd();
+        return;
+    }
+
+    if (texture_unit == 3) {
+        if (state.proctex.enable) {
+            OpReturnValue(ProcTexSampler());
+        } else {
+            OpReturnValue(zero_vec);
+        }
+        OpFunctionEnd();
+        return;
+    }
+
+    const Id border_label{OpLabel()};
+    const Id not_border_label{OpLabel()};
+
+    u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit;
+    const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[texcoord_num])};
+
+    auto& texture_border_color = state.texture_border_color[texture_unit];
+    if (texture_border_color.enable_s || texture_border_color.enable_t) {
+        const Id texcoord_s{OpCompositeExtract(f32_id, texcoord, 0)};
+        const Id texcoord_t{OpCompositeExtract(f32_id, texcoord, 1)};
+
+        const Id s_lt_zero{OpFOrdLessThan(bool_id, texcoord_s, ConstF32(0.0f))};
+        const Id s_gt_one{OpFOrdGreaterThan(bool_id, texcoord_s, ConstF32(1.0f))};
+        const Id t_lt_zero{OpFOrdLessThan(bool_id, texcoord_t, ConstF32(0.0f))};
+        const Id t_gt_one{OpFOrdGreaterThan(bool_id, texcoord_t, ConstF32(1.0f))};
+
+        Id cond{};
+        if (texture_border_color.enable_s && texture_border_color.enable_t) {
+            cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(4), s_lt_zero, s_gt_one,
+                                                       t_lt_zero, t_gt_one));
+        } else if (texture_border_color.enable_s) {
+            cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), s_lt_zero, s_gt_one));
+        } else if (texture_border_color.enable_t) {
+            cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), t_lt_zero, t_gt_one));
+        }
+
+        OpSelectionMerge(not_border_label, spv::SelectionControlMask::MaskNone);
+        OpBranchConditional(cond, border_label, not_border_label);
+
+        AddLabel(border_label);
+        const Id border_color{
+            GetShaderDataMember(vec_ids.Get(4), ConstS32(29), ConstU32(texture_unit))};
+        OpReturnValue(border_color);
+
+        AddLabel(not_border_label);
+    }
+
+    // PICA's LOD formula for 2D textures.
+    // This LOD formula is the same as the LOD lower limit defined in OpenGL.
+    // f(x, y) >= max{m_u, m_v, m_w}
+    // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
+    const auto sample_lod = [&](Id tex_id) {
+        const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)};
+        const Id tex_image{OpImage(image2d_id, sampled_image)};
+        const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
+        const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
+        const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
+        const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
+        const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
+        const Id dx_dy_max{
+            OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
+        const Id lod{OpLog2(f32_id, dx_dy_max)};
+        const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))};
+        const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)};
+        return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord,
+                                        spv::ImageOperandsMask::Lod, biased_lod);
+    };
+
+    const auto sample_3d = [&](Id tex_id, bool projection) {
+        const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id;
+        const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)};
+        const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
+        const Id coord{OpCompositeConstruct(vec_ids.Get(3), OpCompositeExtract(f32_id, texcoord, 0),
+                                            OpCompositeExtract(f32_id, texcoord, 1), texcoord0_w)};
+        if (projection) {
+            return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
+        } else {
+            return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
+        }
+    };
+
+    Id ret_val{void_id};
+    switch (texture_unit) {
+    case 0:
+        // Only unit 0 respects the texturing type
+        switch (state.texture0_type) {
+        case Pica::TexturingRegs::TextureConfig::Texture2D:
+            ret_val = sample_lod(tex0_id);
+            break;
+        case Pica::TexturingRegs::TextureConfig::Projection2D:
+            ret_val = sample_3d(tex0_id, true);
+            break;
+        case Pica::TexturingRegs::TextureConfig::TextureCube:
+            ret_val = sample_3d(tex_cube_id, false);
+            break;
+        case Pica::TexturingRegs::TextureConfig::Shadow2D:
+            ret_val = SampleShadow();
+            // case Pica::TexturingRegs::TextureConfig::ShadowCube:
+            // return "shadowTextureCube(texcoord0, texcoord0_w)";
+            break;
+        default:
+            LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
+            UNIMPLEMENTED();
+            ret_val = zero_vec;
+            break;
+        }
+        break;
+    case 1:
+        ret_val = sample_lod(tex1_id);
+        break;
+    case 2:
+        ret_val = sample_lod(tex2_id);
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+
+    OpReturnValue(ret_val);
+    OpFunctionEnd();
+}
+
+Id FragmentModule::ProcTexSampler() {
     // Define noise tables at the beginning of the function
     if (config.state.proctex.noise_enable) {
         noise1d_table =
@@ -957,24 +1009,11 @@ void FragmentModule::DefineProcTexSampler() {
 
     Id uv{};
     if (config.state.proctex.coord < 3) {
-        Id texcoord_id{};
-        switch (config.state.proctex.coord.Value()) {
-        case 0:
-            texcoord_id = texcoord0_id;
-            break;
-        case 1:
-            texcoord_id = texcoord1_id;
-            break;
-        case 2:
-            texcoord_id = texcoord2_id;
-            break;
-        }
-
-        const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
+        const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.state.proctex.coord.Value()])};
         uv = OpFAbs(vec_ids.Get(2), texcoord);
     } else {
         LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3");
-        uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord0_id));
+        uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord_id[0]));
     }
 
     // This LOD formula is the same as the LOD upper limit defined in OpenGL.
@@ -1058,8 +1097,7 @@ void FragmentModule::DefineProcTexSampler() {
         final_color = OpCompositeInsert(vec_ids.Get(4), final_alpha, final_color, 3);
     }
 
-    OpReturnValue(final_color);
-    OpFunctionEnd();
+    return final_color;
 }
 
 Id FragmentModule::Byteround(Id variable_id, u32 size) {
@@ -1226,13 +1264,13 @@ Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) {
     case Source::SecondaryFragmentColor:
         return secondary_fragment_color;
     case Source::Texture0:
-        return SampleTexture(0);
+        return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[0]);
     case Source::Texture1:
-        return SampleTexture(1);
+        return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[1]);
     case Source::Texture2:
-        return SampleTexture(2);
+        return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[2]);
     case Source::Texture3:
-        return SampleTexture(3);
+        return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[3]);
     case Source::PreviousBuffer:
         return combiner_buffer;
     case Source::Constant:
@@ -1428,9 +1466,9 @@ void FragmentModule::DefineEntryPoint() {
 
     const Id main_type{TypeFunction(TypeVoid())};
     const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)};
-    AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, texcoord0_id,
-                  texcoord1_id, texcoord2_id, texcoord0_w_id, normquat_id, view_id, color_id,
-                  gl_frag_coord_id, gl_frag_depth_id);
+    AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id,
+                  texcoord_id[0], texcoord_id[1], texcoord_id[2], texcoord0_w_id, normquat_id,
+                  view_id, color_id, gl_frag_coord_id, gl_frag_depth_id);
     AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft);
     AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing);
 }
@@ -1443,21 +1481,25 @@ void FragmentModule::DefineUniformStructs() {
     const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))};
     const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))};
     const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))};
+    const Id border_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_NON_PROC_TEX_UNITS))};
 
-    const Id shader_data_struct_id{TypeStruct(
-        i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id, i32_id,
-        i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id, lighting_lut_array_id,
-        vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3),
-        light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(3), vec_ids.Get(4))};
+    const Id shader_data_struct_id{
+        TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id,
+                   i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id,
+                   lighting_lut_array_id, vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2),
+                   vec_ids.Get(2), vec_ids.Get(3), light_src_array_id, const_color_array_id,
+                   vec_ids.Get(4), vec_ids.Get(3), border_color_array_id, vec_ids.Get(4))};
 
     constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u};
-    constexpr std::array shader_data_offsets{
-        0u,  4u,  8u,  12u, 16u, 20u,  24u,  28u,  32u,  36u,  40u,  44u,   48u,   52u,   56u,
-        60u, 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u, 224u, 240u, 1136u, 1232u, 1248u, 1264u};
+    constexpr std::array shader_data_offsets{0u,   4u,   8u,    12u,   16u,   20u,   24u,  28u,
+                                             32u,  36u,  40u,   44u,   48u,   52u,   56u,  60u,
+                                             64u,  68u,  72u,   80u,   176u,  192u,  200u, 208u,
+                                             224u, 240u, 1136u, 1232u, 1248u, 1264u, 1312u};
 
     Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u);
     Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u);
     Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u);
+    Decorate(border_color_array_id, spv::Decoration::ArrayStride, 16u);
     for (u32 i = 0; i < static_cast<u32>(light_src_offsets.size()); i++) {
         MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]);
     }
@@ -1475,9 +1517,9 @@ void FragmentModule::DefineUniformStructs() {
 void FragmentModule::DefineInterface() {
     // Define interface block
     primary_color_id = DefineInput(vec_ids.Get(4), 1);
-    texcoord0_id = DefineInput(vec_ids.Get(2), 2);
-    texcoord1_id = DefineInput(vec_ids.Get(2), 3);
-    texcoord2_id = DefineInput(vec_ids.Get(2), 4);
+    texcoord_id[0] = DefineInput(vec_ids.Get(2), 2);
+    texcoord_id[1] = DefineInput(vec_ids.Get(2), 3);
+    texcoord_id[2] = DefineInput(vec_ids.Get(2), 4);
     texcoord0_w_id = DefineInput(f32_id, 5);
     normquat_id = DefineInput(vec_ids.Get(4), 6);
     view_id = DefineInput(vec_ids.Get(3), 7);
diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h b/src/video_core/renderer_vulkan/vk_shader_gen_spv.h
index 98631423f..32bac11c2 100644
--- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h
+++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.h
@@ -30,6 +30,8 @@ class FragmentModule : public Sirit::Module {
     static constexpr u32 NUM_TEV_STAGES = 6;
     static constexpr u32 NUM_LIGHTS = 8;
     static constexpr u32 NUM_LIGHTING_SAMPLERS = 24;
+    static constexpr u32 NUM_TEX_UNITS = 4;
+    static constexpr u32 NUM_NON_PROC_TEX_UNITS = 3;
 
 public:
     explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config);
@@ -57,15 +59,15 @@ private:
     /// Writes the code to emulate the specified TEV stage
     void WriteTevStage(s32 index);
 
-    /// Defines the tex3 proctex sampling function
-    void DefineProcTexSampler();
+    /// Defines the basic texture sampling functions for a unit
+    void DefineTexSampler(u32 texture_unit);
+
+    /// Function for sampling the procedurally generated texture unit.
+    Id ProcTexSampler();
 
     /// Writes the if-statement condition used to evaluate alpha testing.
     void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
 
-    /// Samples the current fragment texel from the provided texture unit
-    [[nodiscard]] Id SampleTexture(u32 texture_unit);
-
     /// Samples the current fragment texel from shadow plane
     [[nodiscard]] Id SampleShadow();
 
@@ -237,9 +239,7 @@ private:
     Id shader_data_id{};
 
     Id primary_color_id{};
-    Id texcoord0_id{};
-    Id texcoord1_id{};
-    Id texcoord2_id{};
+    Id texcoord_id[NUM_NON_PROC_TEX_UNITS]{};
     Id texcoord0_w_id{};
     Id normquat_id{};
     Id view_id{};
@@ -276,7 +276,7 @@ private:
     Id alpha_results_2{};
     Id alpha_results_3{};
 
-    Id proctex_func{};
+    Id sample_tex_unit_func[NUM_TEX_UNITS]{};
     Id noise1d_table{};
     Id noise2d_table{};
     Id lut_offsets{};
diff --git a/src/video_core/shader/shader_uniforms.cpp b/src/video_core/shader/shader_uniforms.cpp
index 8247d697b..baa496474 100644
--- a/src/video_core/shader/shader_uniforms.cpp
+++ b/src/video_core/shader/shader_uniforms.cpp
@@ -67,6 +67,7 @@ layout ({}std140) uniform shader_data {{
     vec4 const_color[NUM_TEV_STAGES];
     vec4 tev_combiner_buffer_color;
     vec3 tex_lod_bias;
+    vec4 tex_border_color[3];
     vec4 clip_coef;
 }};
 )";
diff --git a/src/video_core/shader/shader_uniforms.h b/src/video_core/shader/shader_uniforms.h
index 1cc8e574f..168f3f14c 100644
--- a/src/video_core/shader/shader_uniforms.h
+++ b/src/video_core/shader/shader_uniforms.h
@@ -64,10 +64,11 @@ struct UniformData {
     alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
     alignas(16) Common::Vec4f tev_combiner_buffer_color;
     alignas(16) Common::Vec3f tex_lod_bias;
+    alignas(16) Common::Vec4f tex_border_color[3];
     alignas(16) Common::Vec4f clip_coef;
 };
 
-static_assert(sizeof(UniformData) == 0x500,
+static_assert(sizeof(UniformData) == 0x530,
               "The size of the UniformData does not match the structure in the shader");
 static_assert(sizeof(UniformData) < 16384,
               "UniformData structure must be less than 16kb as per the OpenGL spec");