From fd40abda7fd5d4f5d9149c4e827d627fba55dc6b Mon Sep 17 00:00:00 2001 From: "Herman S." <429230+has207@users.noreply.github.com> Date: Wed, 27 Aug 2025 20:44:15 +0900 Subject: [PATCH 1/2] Add resolution scaling support to Vulkan backend. Largely similar to D3D12 implementation but more simple buffer management and no mips scaling. Tested on Linux with 2x2 and 3x3 running smoothly. --- src/xenia/gpu/texture_cache.cc | 13 +- .../gpu/vulkan/vulkan_command_processor.cc | 33 ++- .../gpu/vulkan/vulkan_command_processor.h | 4 +- .../gpu/vulkan/vulkan_render_target_cache.cc | 161 +++++++++++- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 246 +++++++++++++++++- src/xenia/gpu/vulkan/vulkan_texture_cache.h | 46 ++++ 6 files changed, 468 insertions(+), 35 deletions(-) diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc index 7ba729b8923..7120abe324d 100644 --- a/src/xenia/gpu/texture_cache.cc +++ b/src/xenia/gpu/texture_cache.cc @@ -183,10 +183,15 @@ TextureCache::~TextureCache() { bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out, uint32_t& y_out) { - uint32_t config_x = - uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_x)); - uint32_t config_y = - uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_y)); + // Clamp to valid range [1, max] to ensure safe conversion to uint32_t + int32_t config_x_signed = + std::clamp(cvars::draw_resolution_scale_x, INT32_C(1), + static_cast(kMaxDrawResolutionScaleAlongAxis)); + int32_t config_y_signed = + std::clamp(cvars::draw_resolution_scale_y, INT32_C(1), + static_cast(kMaxDrawResolutionScaleAlongAxis)); + uint32_t config_x = static_cast(config_x_signed); + uint32_t config_y = static_cast(config_y_signed); uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x); uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y); x_out = clamped_x; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index ab102949c92..e5d72067e44 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -273,10 +273,13 @@ bool VulkanCommandProcessor::SetupContext() { << shared_memory_binding_count_log2; // Requires the transient descriptor set layouts. - // TODO(Triang3l): Get the actual draw resolution scale when the texture cache - // supports resolution scaling. + // Get draw resolution scale using the same method as D3D12 + uint32_t draw_resolution_scale_x, draw_resolution_scale_y; + TextureCache::GetConfigDrawResolutionScale(draw_resolution_scale_x, + draw_resolution_scale_y); render_target_cache_ = std::make_unique( - *register_file_, *memory_, trace_writer_, 1, 1, *this); + *register_file_, *memory_, trace_writer_, draw_resolution_scale_x, + draw_resolution_scale_y, *this); if (!render_target_cache_->Initialize(shared_memory_binding_count)) { XELOGE("Failed to initialize the render target cache"); return false; @@ -339,10 +342,10 @@ bool VulkanCommandProcessor::SetupContext() { } // Requires the transient descriptor set layouts. - // TODO(Triang3l): Actual draw resolution scale. - texture_cache_ = - VulkanTextureCache::Create(*register_file_, *shared_memory_, 1, 1, *this, - guest_shader_pipeline_stages_); + // Use the same draw resolution scale as render target cache + texture_cache_ = VulkanTextureCache::Create( + *register_file_, *shared_memory_, draw_resolution_scale_x, + draw_resolution_scale_y, *this, guest_shader_pipeline_stages_); if (!texture_cache_) { XELOGE("Failed to initialize the texture cache"); return false; @@ -2442,15 +2445,19 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // life. Or even disregard the viewport bounds range in the fragment shader // interlocks case completely - apply the viewport and the scissor offset // directly to pixel address and to things like ps_param_gen. + uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x(); + uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y(); draw_util::GetHostViewportInfo( - regs, 1, 1, false, device_properties.maxViewportDimensions[0], + regs, draw_resolution_scale_x, draw_resolution_scale_y, false, + device_properties.maxViewportDimensions[0], device_properties.maxViewportDimensions[1], true, normalized_depth_control, false, host_render_targets_used, pixel_shader && pixel_shader->writes_depth(), viewport_info); // Update dynamic graphics pipeline state. UpdateDynamicState(viewport_info, primitive_polygonal, - normalized_depth_control); + normalized_depth_control, draw_resolution_scale_x, + draw_resolution_scale_y); auto vgt_draw_initiator = regs.Get(); @@ -3243,7 +3250,8 @@ void VulkanCommandProcessor::DestroyScratchBuffer() { void VulkanCommandProcessor::UpdateDynamicState( const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, - reg::RB_DEPTHCONTROL normalized_depth_control) { + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y) { #if XE_GPU_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES @@ -3279,6 +3287,11 @@ void VulkanCommandProcessor::UpdateDynamicState( // Scissor. draw_util::Scissor scissor; draw_util::GetScissor(regs, scissor); + // Scale the scissor to match the render target resolution scale + scissor.offset[0] *= draw_resolution_scale_x; + scissor.offset[1] *= draw_resolution_scale_y; + scissor.extent[0] *= draw_resolution_scale_x; + scissor.extent[1] *= draw_resolution_scale_y; VkRect2D scissor_rect; scissor_rect.offset.x = int32_t(scissor.offset[0]); scissor_rect.offset.y = int32_t(scissor.offset[1]); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index cd872dc84d2..6279a217038 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -426,7 +426,9 @@ class VulkanCommandProcessor : public CommandProcessor { void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, - reg::RB_DEPTHCONTROL normalized_depth_control); + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t draw_resolution_scale_x, + uint32_t draw_resolution_scale_y); void UpdateSystemConstantValues( bool primitive_polygonal, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 47422876ae0..abf520dd7bc 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -1067,6 +1067,13 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, uint32_t dump_pitch; resolve_info.GetCopyEdramTileSpan(dump_base, dump_row_length_used, dump_rows, dump_pitch); + // Scale tile parameters for resolution scaling to match resolve shader + // expectations + if (IsDrawResolutionScaled()) { + dump_row_length_used *= draw_resolution_scale_x(); + dump_rows *= draw_resolution_scale_y(); + dump_pitch *= draw_resolution_scale_x(); + } DumpRenderTargets(dump_base, dump_row_length_used, dump_rows, dump_pitch); } @@ -1101,15 +1108,95 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, kStorageBufferCompute); if (descriptor_set_dest != VK_NULL_HANDLE) { // Write the destination descriptor. - // TODO(Triang3l): Scaled resolve buffer binding. VkDescriptorBufferInfo write_descriptor_set_dest_buffer_info; - write_descriptor_set_dest_buffer_info.buffer = shared_memory.buffer(); - write_descriptor_set_dest_buffer_info.offset = - resolve_info.copy_dest_base; - write_descriptor_set_dest_buffer_info.range = - resolve_info.copy_dest_extent_start - - resolve_info.copy_dest_base + - resolve_info.copy_dest_extent_length; + + bool scaled_buffer_ready = false; + if (draw_resolution_scaled) { + // For scaled resolve, ensure the scaled buffer exists and bind to + // it + uint32_t dest_address = resolve_info.copy_dest_base; + uint32_t dest_length = resolve_info.copy_dest_extent_start - + resolve_info.copy_dest_base + + resolve_info.copy_dest_extent_length; + + // Ensure scaled resolve memory is committed + scaled_buffer_ready = true; + if (!texture_cache.EnsureScaledResolveMemoryCommittedPublic( + dest_address, dest_length)) { + XELOGE( + "Failed to commit scaled resolve memory for resolve dest at " + "0x{:08X}", + dest_address); + scaled_buffer_ready = false; + } + + // Make the range current to get the buffer + if (scaled_buffer_ready && + !texture_cache.MakeScaledResolveRangeCurrent(dest_address, + dest_length)) { + XELOGE( + "Failed to make scaled resolve range current for resolve " + "dest at 0x{:08X}", + dest_address); + scaled_buffer_ready = false; + } + + // Get the current scaled buffer + VkBuffer scaled_buffer = VK_NULL_HANDLE; + if (scaled_buffer_ready) { + scaled_buffer = texture_cache.GetCurrentScaledResolveBuffer(); + if (scaled_buffer == VK_NULL_HANDLE) { + XELOGE( + "No current scaled resolve buffer for resolve dest at " + "0x{:08X}", + dest_address); + scaled_buffer_ready = false; + } + } + + if (scaled_buffer_ready) { + // Calculate offset within the scaled buffer + uint32_t draw_resolution_scale_area = + draw_resolution_scale_x() * draw_resolution_scale_y(); + uint64_t scaled_offset = + uint64_t(dest_address) * draw_resolution_scale_area; + + // Get the buffer's base offset to calculate relative offset + uint64_t buffer_relative_offset = 0; + size_t buffer_index = + texture_cache.GetScaledResolveCurrentBufferIndex(); + auto* buffer_info = + texture_cache.GetScaledResolveBufferInfo(buffer_index); + if (buffer_info) { + buffer_relative_offset = + scaled_offset - buffer_info->range_start_scaled; + } + + write_descriptor_set_dest_buffer_info.buffer = scaled_buffer; + write_descriptor_set_dest_buffer_info.offset = + buffer_relative_offset; + write_descriptor_set_dest_buffer_info.range = + dest_length * draw_resolution_scale_area; + } + } + + if (!scaled_buffer_ready) { + // Regular unscaled resolve - write to shared memory + if (draw_resolution_scaled) { + XELOGW( + "Falling back to unscaled resolve at 0x{:08X} - scaled " + "buffer not available", + resolve_info.copy_dest_base); + } + write_descriptor_set_dest_buffer_info.buffer = + shared_memory.buffer(); + write_descriptor_set_dest_buffer_info.offset = + resolve_info.copy_dest_base; + write_descriptor_set_dest_buffer_info.range = + resolve_info.copy_dest_extent_start - + resolve_info.copy_dest_base + + resolve_info.copy_dest_extent_length; + } VkWriteDescriptorSet write_descriptor_set_dest; write_descriptor_set_dest.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -1128,11 +1215,37 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, nullptr); // Submit the resolve. - // TODO(Triang3l): Transition the scaled resolve buffer. - shared_memory.Use(VulkanSharedMemory::Usage::kComputeWrite, - std::pair( - resolve_info.copy_dest_extent_start, - resolve_info.copy_dest_extent_length)); + if (!scaled_buffer_ready) { + // Regular unscaled - transition shared memory for write + shared_memory.Use(VulkanSharedMemory::Usage::kComputeWrite, + std::pair( + resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_length)); + } else { + // Scaled - add barrier for the scaled resolve buffer + // The buffer transitions from compute shader read (texture loading) + // to compute shader write + VkBuffer scaled_buffer = + texture_cache.GetCurrentScaledResolveBuffer(); + if (scaled_buffer != VK_NULL_HANDLE) { + VkBufferMemoryBarrier buffer_barrier = {}; + buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + // More specific: previous compute shader reads to compute shader + // write + buffer_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + buffer_barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_barrier.buffer = scaled_buffer; + buffer_barrier.offset = 0; + buffer_barrier.size = VK_WHOLE_SIZE; + + command_buffer.CmdVkPipelineBarrier( + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // From compute shader + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // To compute shader + 0, 0, nullptr, 1, &buffer_barrier, 0, nullptr); + } + } UseEdramBuffer(EdramBufferUsage::kComputeRead); command_processor_.BindExternalComputePipeline( resolve_copy_pipelines_[size_t(copy_shader)]); @@ -1163,6 +1276,28 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, command_buffer.CmdVkDispatch(copy_group_count_x, copy_group_count_y, 1); + // Add barrier after writing to scaled resolve buffer + if (scaled_buffer_ready) { + VkBuffer scaled_buffer = + texture_cache.GetCurrentScaledResolveBuffer(); + if (scaled_buffer != VK_NULL_HANDLE) { + VkBufferMemoryBarrier buffer_barrier = {}; + buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + buffer_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + buffer_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_barrier.buffer = scaled_buffer; + buffer_barrier.offset = 0; + buffer_barrier.size = VK_WHOLE_SIZE; + + command_buffer.CmdVkPipelineBarrier( + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1, + &buffer_barrier, 0, nullptr); + } + } + // Invalidate textures and mark the range as scaled if needed. texture_cache.MarkRangeAsResolved( resolve_info.copy_dest_extent_start, diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index cd6b62c265a..0bcdb464e2c 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -477,6 +477,16 @@ VulkanTextureCache::~VulkanTextureCache() { // textures before destroying VMA. DestroyAllTextures(true); + // Clean up scaled resolve buffers before destroying VMA + // The command processor should ensure all GPU operations are complete + // before the texture cache is destroyed + for (ScaledResolveBuffer& buffer : scaled_resolve_buffers_) { + if (buffer.buffer != VK_NULL_HANDLE) { + vmaDestroyBuffer(vma_allocator_, buffer.buffer, buffer.allocation); + } + } + scaled_resolve_buffers_.clear(); + if (vma_allocator_ != VK_NULL_HANDLE) { vmaDestroyAllocator(vma_allocator_); } @@ -896,6 +906,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture( return VK_NULL_HANDLE; } if (!LoadTextureData(*texture)) { + XELOGE("Failed to load texture data for swap texture"); return VK_NULL_HANDLE; } texture->MarkAsUsed(); @@ -925,6 +936,13 @@ VkImageView VulkanTextureCache::RequestSwapTexture( return texture_view; } +bool VulkanTextureCache::IsScaledResolveSupportedForFormat( + TextureKey key) const { + // Check if the format has a valid host format pair, meaning we can handle it + const HostFormatPair& host_format_pair = GetHostFormatPair(key); + return host_format_pair.format_unsigned.format != VK_FORMAT_UNDEFINED; +} + bool VulkanTextureCache::IsSignedVersionSeparateForFormat( TextureKey key) const { const HostFormatPair& host_format_pair = GetHostFormatPair(key); @@ -1280,12 +1298,69 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, if (!descriptor_set_source_base) { return false; } - write_descriptor_set_source_base_buffer_info.buffer = - vulkan_shared_memory.buffer(); - write_descriptor_set_source_base_buffer_info.offset = texture_key.base_page - << 12; - write_descriptor_set_source_base_buffer_info.range = - xe::align(vulkan_texture.GetGuestBaseSize(), source_length_alignment); + if (texture_key.scaled_resolve) { + // For scaled textures, read from scaled resolve buffers + uint32_t guest_address = texture_key.base_page << 12; + uint32_t guest_size = vulkan_texture.GetGuestBaseSize(); + + // Ensure the scaled buffer exists + if (EnsureScaledResolveMemoryCommitted(guest_address, guest_size)) { + // Make the range current + if (MakeScaledResolveRangeCurrent(guest_address, guest_size)) { + VkBuffer scaled_buffer = GetCurrentScaledResolveBuffer(); + if (scaled_buffer != VK_NULL_HANDLE) { + // Calculate offset within the scaled buffer + uint32_t draw_resolution_scale_area = + draw_resolution_scale_x() * draw_resolution_scale_y(); + uint64_t scaled_offset = + uint64_t(guest_address) * draw_resolution_scale_area; + + uint64_t buffer_relative_offset = 0; + if (scaled_resolve_current_buffer_index_ < + scaled_resolve_buffers_.size()) { + const ScaledResolveBuffer& current_buffer = + scaled_resolve_buffers_[scaled_resolve_current_buffer_index_]; + buffer_relative_offset = + scaled_offset - current_buffer.range_start_scaled; + } + + write_descriptor_set_source_base_buffer_info.buffer = scaled_buffer; + write_descriptor_set_source_base_buffer_info.offset = + buffer_relative_offset; + write_descriptor_set_source_base_buffer_info.range = + xe::align(guest_size * draw_resolution_scale_area, + source_length_alignment); + + } else { + XELOGE( + "Scaled resolve texture load: Failed to get current scaled " + "buffer for texture at 0x{:08X}", + guest_address); + return false; + } + } else { + XELOGE( + "Scaled resolve texture load: Failed to make range current for " + "texture at 0x{:08X}", + guest_address); + return false; + } + } else { + XELOGE( + "Scaled resolve texture load: Failed to ensure scaled memory for " + "texture at 0x{:08X}", + guest_address); + return false; + } + } else { + // Regular unscaled texture - use shared memory + write_descriptor_set_source_base_buffer_info.buffer = + vulkan_shared_memory.buffer(); + write_descriptor_set_source_base_buffer_info.offset = + texture_key.base_page << 12; + write_descriptor_set_source_base_buffer_info.range = + xe::align(vulkan_texture.GetGuestBaseSize(), source_length_alignment); + } VkWriteDescriptorSet& write_descriptor_set_source_base = write_descriptor_sets[write_descriptor_set_count++]; write_descriptor_set_source_base.sType = @@ -1310,6 +1385,10 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, if (!descriptor_set_source_mips) { return false; } + // TODO: Implement scaled mips support similar to D3D12. + // Currently mips are always loaded from unscaled shared memory even when + // the base texture is scaled. D3D12 properly handles scaled mips in + // D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl. write_descriptor_set_source_mips_buffer_info.buffer = vulkan_shared_memory.buffer(); write_descriptor_set_source_mips_buffer_info.offset = texture_key.mip_page @@ -1702,7 +1781,6 @@ VulkanTextureCache::VulkanTextureCache( command_processor_(command_processor), guest_shader_pipeline_stages_(guest_shader_pipeline_stages) { // TODO(Triang3l): Support draw resolution scaling. - assert_true(draw_resolution_scale_x == 1 && draw_resolution_scale_y == 1); } bool VulkanTextureCache::Initialize() { @@ -2656,6 +2734,160 @@ xenos::ClampMode VulkanTextureCache::NormalizeClampMode( return clamp_mode; } +bool VulkanTextureCache::EnsureScaledResolveMemoryCommitted( + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2) { + if (!IsDrawResolutionScaled()) { + return true; + } + + if (length_unscaled == 0) { + return true; + } + + if (start_unscaled > SharedMemory::kBufferSize || + (SharedMemory::kBufferSize - start_unscaled) < length_unscaled) { + return false; + } + + uint32_t draw_resolution_scale_area = + draw_resolution_scale_x() * draw_resolution_scale_y(); + uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area; + uint64_t length_scaled_alignment_bits = + (UINT64_C(1) << length_scaled_alignment_log2) - 1; + uint64_t length_scaled = + (uint64_t(length_unscaled) * draw_resolution_scale_area + + length_scaled_alignment_bits) & + ~length_scaled_alignment_bits; + + // Check if any existing buffer covers this range + + bool range_covered = false; + for (const ScaledResolveBuffer& buffer : scaled_resolve_buffers_) { + if (buffer.range_start_scaled <= start_scaled && + (buffer.range_start_scaled + buffer.range_length_scaled) >= + (start_scaled + length_scaled)) { + // This buffer covers the requested range + scaled_resolve_current_range_start_scaled_ = buffer.range_start_scaled; + scaled_resolve_current_range_length_scaled_ = buffer.range_length_scaled; + range_covered = true; + break; + } + } + + if (!range_covered) { + // Need to create a new buffer or extend an existing one + // For simplicity and to avoid fragmentation, we'll use a fixed-size buffer + // approach similar to D3D12 (but smaller - 256MB chunks instead of 2GB) + constexpr uint64_t kBufferSize = 256 * 1024 * 1024; // 256MB per buffer + + // Round up the range to cover complete buffer chunks + uint64_t buffer_start = (start_scaled / kBufferSize) * kBufferSize; + uint64_t buffer_end = + ((start_scaled + length_scaled + kBufferSize - 1) / kBufferSize) * + kBufferSize; + uint64_t buffer_size = buffer_end - buffer_start; + + // Check again if this expanded range is covered + bool expanded_range_covered = false; + for (const ScaledResolveBuffer& buffer : scaled_resolve_buffers_) { + if (buffer.range_start_scaled <= buffer_start && + (buffer.range_start_scaled + buffer.range_length_scaled) >= + buffer_end) { + scaled_resolve_current_range_start_scaled_ = buffer.range_start_scaled; + scaled_resolve_current_range_length_scaled_ = + buffer.range_length_scaled; + expanded_range_covered = true; + break; + } + } + + if (!expanded_range_covered) { + // Limit the number of buffers to prevent unbounded growth + constexpr size_t kMaxBuffers = 32; // Maximum 8GB total (32 * 256MB) + if (scaled_resolve_buffers_.size() >= kMaxBuffers) { + // Reuse the least recently used buffer + // For now, just reuse the first buffer (simple LRU would be better) + ScaledResolveBuffer& reused_buffer = scaled_resolve_buffers_[0]; + reused_buffer.range_start_scaled = buffer_start; + reused_buffer.range_length_scaled = buffer_size; + scaled_resolve_current_range_start_scaled_ = buffer_start; + scaled_resolve_current_range_length_scaled_ = buffer_size; + } else { + ScaledResolveBuffer new_buffer; + new_buffer.size = buffer_size; + + VkBufferCreateInfo buffer_create_info = {}; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.size = new_buffer.size; + buffer_create_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + VmaAllocationCreateInfo allocation_create_info = {}; + allocation_create_info.usage = VMA_MEMORY_USAGE_GPU_ONLY; + + VkResult result = vmaCreateBuffer( + vma_allocator_, &buffer_create_info, &allocation_create_info, + &new_buffer.buffer, &new_buffer.allocation, nullptr); + + if (result != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to create scaled resolve buffer: {}", + static_cast(result)); + return false; + } + + new_buffer.range_start_scaled = buffer_start; + new_buffer.range_length_scaled = buffer_size; + + scaled_resolve_buffers_.push_back(new_buffer); + scaled_resolve_current_range_start_scaled_ = buffer_start; + scaled_resolve_current_range_length_scaled_ = buffer_size; + } + } + } + + return true; +} + +bool VulkanTextureCache::MakeScaledResolveRangeCurrent( + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2) { + if (!IsDrawResolutionScaled()) { + return false; + } + + // First ensure the memory is committed (creates buffers if needed) + if (!EnsureScaledResolveMemoryCommitted(start_unscaled, length_unscaled, + length_scaled_alignment_log2)) { + return false; + } + + uint32_t draw_resolution_scale_area = + draw_resolution_scale_x() * draw_resolution_scale_y(); + uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area; + + // Find which buffer contains this range + for (size_t i = 0; i < scaled_resolve_buffers_.size(); ++i) { + const ScaledResolveBuffer& buffer = scaled_resolve_buffers_[i]; + if (start_scaled >= buffer.range_start_scaled && + start_scaled < + (buffer.range_start_scaled + buffer.range_length_scaled)) { + scaled_resolve_current_buffer_index_ = i; + return true; + } + } + + return false; +} + +VkBuffer VulkanTextureCache::GetCurrentScaledResolveBuffer() const { + if (scaled_resolve_current_buffer_index_ >= scaled_resolve_buffers_.size()) { + return VK_NULL_HANDLE; + } + return scaled_resolve_buffers_[scaled_resolve_current_buffer_index_].buffer; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index e092c233b94..472dc35edf2 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -121,7 +121,42 @@ class VulkanTextureCache final : public TextureCache { uint32_t& height_scaled_out, xenos::TextureFormat& format_out); + // Scaled resolve buffer management (for use by VulkanRenderTargetCache) + struct ScaledResolveBuffer { + VkBuffer buffer = VK_NULL_HANDLE; + VmaAllocation allocation = VK_NULL_HANDLE; + uint64_t size = 0; + uint64_t range_start_scaled = 0; + uint64_t range_length_scaled = 0; + }; + + // Public scaled resolve buffer methods for use by VulkanRenderTargetCache + bool EnsureScaledResolveMemoryCommittedPublic( + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0) { + return EnsureScaledResolveMemoryCommitted(start_unscaled, length_unscaled, + length_scaled_alignment_log2); + } + + bool MakeScaledResolveRangeCurrent(uint32_t start_unscaled, + uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0); + + VkBuffer GetCurrentScaledResolveBuffer() const; + + size_t GetScaledResolveCurrentBufferIndex() const { + return scaled_resolve_current_buffer_index_; + } + + const ScaledResolveBuffer* GetScaledResolveBufferInfo(size_t index) const { + if (index < scaled_resolve_buffers_.size()) { + return &scaled_resolve_buffers_[index]; + } + return nullptr; + } + protected: + bool IsScaledResolveSupportedForFormat(TextureKey key) const override; bool IsSignedVersionSeparateForFormat(TextureKey key) const override; uint32_t GetHostFormatSwizzle(TextureKey key) const override; @@ -135,6 +170,10 @@ class VulkanTextureCache final : public TextureCache { bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base, bool load_mips) override; + bool EnsureScaledResolveMemoryCommitted( + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0) override; + void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override; private: @@ -352,6 +391,13 @@ class VulkanTextureCache final : public TextureCache { samplers_; std::pair* sampler_used_first_ = nullptr; std::pair* sampler_used_last_ = nullptr; + + // Scaled resolve buffer storage + std::vector scaled_resolve_buffers_; + // Current scaled resolve range tracking + uint64_t scaled_resolve_current_range_start_scaled_ = 0; + uint64_t scaled_resolve_current_range_length_scaled_ = 0; + size_t scaled_resolve_current_buffer_index_ = SIZE_MAX; }; } // namespace vulkan From 7942d680df415ee9ee1f6bdcbff24894b884e190 Mon Sep 17 00:00:00 2001 From: "Herman S." <429230+has207@users.noreply.github.com> Date: Thu, 28 Aug 2025 20:01:58 +0900 Subject: [PATCH 2/2] Fix up FSI rendering path - EDRAM scaling and tile dimension scaling - Resolution scaling inversion and coordinate division by scale factor - Added center pixel check to prevent duplicate exports - Reverted texture_cache.cc change (don't need to touch that file) --- src/xenia/gpu/spirv_shader_translator.cc | 16 ++++- src/xenia/gpu/spirv_shader_translator.h | 10 ++- .../gpu/spirv_shader_translator_fetch.cc | 4 +- .../gpu/spirv_shader_translator_memexport.cc | 68 +++++++++++++++++-- src/xenia/gpu/spirv_shader_translator_rb.cc | 18 +++-- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 4 +- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 5 +- 7 files changed, 103 insertions(+), 22 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index a6c1a7f07e9..68db2b4d450 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -1931,8 +1931,7 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { } void SpirvShaderTranslator::StartFragmentShaderInMain() { - // TODO(Triang3l): Allow memory export with resolution scaling only for the - // center host pixel, with sample shading (for depth format conversion) only + // TODO(Triang3l): With sample shading (for depth format conversion) only // for the bottom-right sample (unlike in Direct3D, the sample mask input // doesn't include covered samples of the primitive that correspond to other // invocations, so use the sample that's the most friendly to the half-pixel @@ -2088,7 +2087,6 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { // see the actual hardware instructions in both OpBitwiseXor and OpFNegate // cases. spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31); - // TODO(Triang3l): Resolution scale inversion. // X - pixel X .0 in the magnitude, is back-facing in the sign bit. assert_true(input_fragment_coordinates_ != spv::NoResult); id_vector_temp_.clear(); @@ -2102,6 +2100,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { input_fragment_coordinates_, id_vector_temp_), spv::NoPrecision))); + // Apply resolution scale inversion after truncating. + if (draw_resolution_scale_x_ > 1) { + param_gen_x = builder_->createBinOp( + spv::OpFMul, type_float_, param_gen_x, + builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_x_))); + } if (!modification.pixel.param_gen_point) { assert_true(input_front_facing_ != spv::NoResult); param_gen_x = builder_->createTriOp( @@ -2137,6 +2141,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { input_fragment_coordinates_, id_vector_temp_), spv::NoPrecision))); + // Apply resolution scale inversion after truncating. + if (draw_resolution_scale_y_ > 1) { + param_gen_y = builder_->createBinOp( + spv::OpFMul, type_float_, param_gen_y, + builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_y_))); + } if (modification.pixel.param_gen_point) { param_gen_y = builder_->createUnaryOp( spv::OpBitcast, type_float_, diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index a201686e0f6..d4890d7de2d 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -349,11 +349,15 @@ class SpirvShaderTranslator : public ShaderTranslator { SpirvShaderTranslator(const Features& features, bool native_2x_msaa_with_attachments, bool native_2x_msaa_no_attachments, - bool edram_fragment_shader_interlock) + bool edram_fragment_shader_interlock, + uint32_t draw_resolution_scale_x = 1, + uint32_t draw_resolution_scale_y = 1) : features_(features), native_2x_msaa_with_attachments_(native_2x_msaa_with_attachments), native_2x_msaa_no_attachments_(native_2x_msaa_no_attachments), - edram_fragment_shader_interlock_(edram_fragment_shader_interlock) {} + edram_fragment_shader_interlock_(edram_fragment_shader_interlock), + draw_resolution_scale_x_(draw_resolution_scale_x), + draw_resolution_scale_y_(draw_resolution_scale_y) {} uint64_t GetDefaultVertexShaderModification( uint32_t dynamic_addressable_register_count, @@ -711,6 +715,8 @@ class SpirvShaderTranslator : public ShaderTranslator { Features features_; bool native_2x_msaa_with_attachments_; bool native_2x_msaa_no_attachments_; + uint32_t draw_resolution_scale_x_; + uint32_t draw_resolution_scale_y_; // For safety with different drivers (even though fragment shader interlock in // SPIR-V only has one control flow requirement - that both begin and end must diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index 8f5a74690b9..00974895402 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -1069,7 +1069,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( coordinates[coordinate_component_index] = coordinates_operand; } - // TODO(Triang3l): Reverting the resolution scale. + // Resolution scale doesn't need reverting for texture weights - weights are + // calculated from fractional parts of coordinates which are + // scale-independent. if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { // FIXME(Triang3l): Filtering modes should possibly be taken into account, diff --git a/src/xenia/gpu/spirv_shader_translator_memexport.cc b/src/xenia/gpu/spirv_shader_translator_memexport.cc index 94c0adf543f..7c4ba746c2e 100644 --- a/src/xenia/gpu/spirv_shader_translator_memexport.cc +++ b/src/xenia/gpu/spirv_shader_translator_memexport.cc @@ -37,10 +37,70 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) { // Check if memory export is allowed in this guest shader invocation. std::optional if_memexport_allowed; - if (main_memexport_allowed_ != spv::NoResult) { - if_memexport_allowed.emplace(main_memexport_allowed_, - spv::SelectionControlDontFlattenMask, - *builder_); + spv::Id memexport_allowed = main_memexport_allowed_; + + // For pixel shaders with resolution scaling, only allow memory export from + // the center host pixel to avoid duplicate exports. + if (is_pixel_shader() && + (draw_resolution_scale_x_ > 1 || draw_resolution_scale_y_ > 1)) { + assert_true(input_fragment_coordinates_ != spv::NoResult); + + // Check if we're at the center pixel (scale/2 for both X and Y). + spv::Id is_center_pixel = builder_->makeBoolConstant(true); + + // Check X coordinate. + if (draw_resolution_scale_x_ > 1) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(const_int_0_); + spv::Id pixel_x = builder_->createUnaryOp( + spv::OpConvertFToU, type_uint_, + builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coordinates_, + id_vector_temp_), + spv::NoPrecision)); + spv::Id pixel_x_remainder = builder_->createBinOp( + spv::OpUMod, type_uint_, pixel_x, + builder_->makeUintConstant(draw_resolution_scale_x_)); + is_center_pixel = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, is_center_pixel, + builder_->createBinOp( + spv::OpIEqual, type_bool_, pixel_x_remainder, + builder_->makeUintConstant(draw_resolution_scale_x_ >> 1))); + } + + // Check Y coordinate. + if (draw_resolution_scale_y_ > 1) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + spv::Id pixel_y = builder_->createUnaryOp( + spv::OpConvertFToU, type_uint_, + builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coordinates_, + id_vector_temp_), + spv::NoPrecision)); + spv::Id pixel_y_remainder = builder_->createBinOp( + spv::OpUMod, type_uint_, pixel_y, + builder_->makeUintConstant(draw_resolution_scale_y_)); + is_center_pixel = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, is_center_pixel, + builder_->createBinOp( + spv::OpIEqual, type_bool_, pixel_y_remainder, + builder_->makeUintConstant(draw_resolution_scale_y_ >> 1))); + } + + // Combine with existing memexport_allowed condition. + memexport_allowed = + memexport_allowed != spv::NoResult + ? builder_->createBinOp(spv::OpLogicalAnd, type_bool_, + memexport_allowed, is_center_pixel) + : is_center_pixel; + } + + if (memexport_allowed != spv::NoResult) { + if_memexport_allowed.emplace( + memexport_allowed, spv::SelectionControlDontFlattenMask, *builder_); } // If the pixel was killed (but the actual killing on the SPIR-V side has not diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index e19fdd540f0..f1e7d5291d1 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -765,9 +765,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { fsi_color_targets_written = builder_->createLoad(var_main_fsi_color_written_, spv::NoPrecision); fsi_const_int_1 = builder_->makeIntConstant(1); - // TODO(Triang3l): Resolution scaling. + // Apply resolution scaling to EDRAM size. fsi_const_edram_size_dwords = builder_->makeUintConstant( - xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples * + xenos::kEdramTileWidthSamples * draw_resolution_scale_x_ * + xenos::kEdramTileHeightSamples * draw_resolution_scale_y_ * xenos::kEdramTileCount); for (uint32_t i = 0; i < 4; ++i) { fsi_samples_covered[i] = builder_->createBinOp( @@ -1449,10 +1450,12 @@ void SpirvShaderTranslator::FSI_LoadEdramOffsets(spv::Id msaa_samples) { // Get 40 x 16 x resolution scale 32bpp half-tile or 40x16 64bpp tile index. // Working with 40x16-sample portions for 64bpp and for swapping for depth - // dividing by 40, not by 80. - // TODO(Triang3l): Resolution scaling. - uint32_t tile_width = xenos::kEdramTileWidthSamples; + // Apply resolution scaling to tile dimensions. + uint32_t tile_width = + xenos::kEdramTileWidthSamples * draw_resolution_scale_x_; spv::Id const_tile_half_width = builder_->makeUintConstant(tile_width >> 1); - uint32_t tile_height = xenos::kEdramTileHeightSamples; + uint32_t tile_height = + xenos::kEdramTileHeightSamples * draw_resolution_scale_y_; spv::Id const_tile_height = builder_->makeUintConstant(tile_height); spv::Id tile_half_index[2], tile_half_sample_coordinates[2]; for (uint32_t i = 0; i < 2; ++i) { @@ -1565,8 +1568,9 @@ spv::Id SpirvShaderTranslator::FSI_AddSampleOffset(spv::Id sample_0_address, return sample_0_address; } spv::Id sample_offset; - // TODO(Triang3l): Resolution scaling. - uint32_t tile_width = xenos::kEdramTileWidthSamples; + // Apply resolution scaling to tile width. + uint32_t tile_width = + xenos::kEdramTileWidthSamples * draw_resolution_scale_x_; if (sample_index == 1) { sample_offset = builder_->makeIntConstant(tile_width); } else { diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 99a8932c8ba..35e35e03061 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -62,7 +62,9 @@ bool VulkanPipelineCache::Initialize() { SpirvShaderTranslator::Features(vulkan_device), render_target_cache_.msaa_2x_attachments_supported(), render_target_cache_.msaa_2x_no_attachments_supported(), - edram_fragment_shader_interlock); + edram_fragment_shader_interlock, + render_target_cache_.draw_resolution_scale_x(), + render_target_cache_.draw_resolution_scale_y()); if (edram_fragment_shader_interlock) { std::vector depth_only_fragment_shader_code = diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index 0bcdb464e2c..08aef796236 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -1280,7 +1280,6 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, write_descriptor_set_dest.pTexelBufferView = nullptr; } // TODO(Triang3l): Use a single 512 MB shared memory binding if possible. - // TODO(Triang3l): Scaled resolve buffer bindings. // Aligning because if the data for a vector in a storage buffer is provided // partially, the value read may still be (0, 0, 0, 0), and small (especially // linear) textures won't be loaded correctly. @@ -1779,9 +1778,7 @@ VulkanTextureCache::VulkanTextureCache( : TextureCache(register_file, shared_memory, draw_resolution_scale_x, draw_resolution_scale_y), command_processor_(command_processor), - guest_shader_pipeline_stages_(guest_shader_pipeline_stages) { - // TODO(Triang3l): Support draw resolution scaling. -} + guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {} bool VulkanTextureCache::Initialize() { const ui::vulkan::VulkanDevice* const vulkan_device =