mirror of https://github.com/stenzek/duckstation
				
				
				
			
			You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			3797 lines
		
	
	
		
			135 KiB
		
	
	
	
		
			C++
		
	
			
		
		
	
	
			3797 lines
		
	
	
		
			135 KiB
		
	
	
	
		
			C++
		
	
| // SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
 | |
| // SPDX-License-Identifier: CC-BY-NC-ND-4.0
 | |
| 
 | |
| #include "gpu_hw_texture_cache.h"
 | |
| #include "game_database.h"
 | |
| #include "gpu_hw.h"
 | |
| #include "gpu_hw_shadergen.h"
 | |
| #include "gpu_sw_rasterizer.h"
 | |
| #include "gpu_thread.h"
 | |
| #include "host.h"
 | |
| #include "imgui_overlays.h"
 | |
| #include "settings.h"
 | |
| #include "system.h"
 | |
| 
 | |
| #include "util/gpu_device.h"
 | |
| #include "util/imgui_fullscreen.h"
 | |
| #include "util/imgui_manager.h"
 | |
| #include "util/state_wrapper.h"
 | |
| 
 | |
| #include "common/error.h"
 | |
| #include "common/file_system.h"
 | |
| #include "common/gsvector_formatter.h"
 | |
| #include "common/heterogeneous_containers.h"
 | |
| #include "common/log.h"
 | |
| #include "common/path.h"
 | |
| #include "common/string_util.h"
 | |
| #include "common/timer.h"
 | |
| 
 | |
| #include "IconsEmoji.h"
 | |
| 
 | |
| #ifndef XXH_STATIC_LINKING_ONLY
 | |
| #define XXH_STATIC_LINKING_ONLY
 | |
| #endif
 | |
| #include "xxhash.h"
 | |
| #ifdef CPU_ARCH_SSE
 | |
| #include "xxh_x86dispatch.h"
 | |
| #endif
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <cmath>
 | |
| #include <numeric>
 | |
| #include <unordered_set>
 | |
| 
 | |
| LOG_CHANNEL(GPU_HW);
 | |
| 
 | |
| #include "common/ryml_helpers.h"
 | |
| 
 | |
| // #define ALWAYS_TRACK_VRAM_WRITES 1
 | |
| 
 | |
| namespace GPUTextureCache {
 | |
| static constexpr u32 MAX_CLUT_SIZE = 256;
 | |
| static constexpr u32 NUM_PAGE_DRAW_RECTS = 4;
 | |
| static constexpr const GSVector4i& INVALID_RECT = GPU_HW::INVALID_RECT;
 | |
| static constexpr const GPUTexture::Format REPLACEMENT_TEXTURE_FORMAT = GPUTexture::Format::RGBA8;
 | |
| static constexpr const char LOCAL_CONFIG_FILENAME[] = "config.yaml";
 | |
| 
 | |
| static constexpr u32 STATE_PALETTE_RECORD_SIZE =
 | |
|   sizeof(GSVector4i) + sizeof(SourceKey) + sizeof(PaletteRecordFlags) + sizeof(HashType) + sizeof(u16) * MAX_CLUT_SIZE;
 | |
| 
 | |
| // Has to be public because it's referenced in Source.
 | |
| struct HashCacheEntry
 | |
| {
 | |
|   std::unique_ptr<GPUTexture> texture;
 | |
|   u32 ref_count;
 | |
|   u32 last_used_frame;
 | |
|   TList<Source> sources;
 | |
| };
 | |
| 
 | |
| namespace {
 | |
| struct VRAMWrite
 | |
| {
 | |
|   GSVector4i active_rect;
 | |
|   GSVector4i write_rect;
 | |
|   HashType hash;
 | |
| 
 | |
|   struct PaletteRecord
 | |
|   {
 | |
|     // TODO: Texture window, for sub texture dumping.
 | |
|     GSVector4i rect;
 | |
|     SourceKey key;
 | |
|     PaletteRecordFlags flags;
 | |
| 
 | |
|     // Awkward to store, but we need to keep a backup copy of each CLUT, because if the CLUT gets overwritten
 | |
|     // before the VRAM write, when we go to dump the texture, it'll be incorrect.
 | |
|     HashType palette_hash;
 | |
|     u16 palette[MAX_CLUT_SIZE];
 | |
|   };
 | |
| 
 | |
|   // List of palettes and rectangles drawn for dumping.
 | |
|   // TODO: Keep these in texel-local space, not global space, that way texture sizes aren't aligned to 4 pixels.
 | |
|   // But realistically, that probably isn't super common, and also requires modifying the renderer side of things.
 | |
|   std::vector<PaletteRecord> palette_records;
 | |
| 
 | |
|   u32 num_splits;
 | |
|   u32 num_page_refs;
 | |
|   std::array<TListNode<VRAMWrite>, MAX_PAGE_REFS_PER_WRITE> page_refs;
 | |
| };
 | |
| 
 | |
| struct PageEntry
 | |
| {
 | |
|   TList<Source> sources;
 | |
|   TList<VRAMWrite> writes; // TODO: Split to own list
 | |
|   u32 num_draw_rects;
 | |
|   GSVector4i total_draw_rect; // NOTE: In global VRAM space.
 | |
|   std::array<GSVector4i, NUM_PAGE_DRAW_RECTS> draw_rects;
 | |
| };
 | |
| 
 | |
| struct HashCacheKey
 | |
| {
 | |
|   HashType texture_hash;
 | |
|   HashType palette_hash;
 | |
|   HashType mode;
 | |
| 
 | |
|   ALWAYS_INLINE bool operator==(const HashCacheKey& k) const
 | |
|   {
 | |
|     return (std::memcmp(&k, this, sizeof(HashCacheKey)) == 0);
 | |
|   }
 | |
|   ALWAYS_INLINE bool operator!=(const HashCacheKey& k) const
 | |
|   {
 | |
|     return (std::memcmp(&k, this, sizeof(HashCacheKey)) != 0);
 | |
|   }
 | |
| };
 | |
| struct HashCacheKeyHash
 | |
| {
 | |
|   size_t operator()(const HashCacheKey& k) const;
 | |
| };
 | |
| 
 | |
| enum class TextureReplacementType : u8
 | |
| {
 | |
|   VRAMReplacement,
 | |
|   TextureFromVRAMWrite,
 | |
|   TextureFromPage,
 | |
| };
 | |
| 
 | |
| struct TextureReplacementSubImage
 | |
| {
 | |
|   GSVector4i dst_rect;
 | |
|   GSVector4i src_rect;
 | |
|   GPUTexture* texture;
 | |
|   float scale_x;
 | |
|   float scale_y;
 | |
|   bool invert_alpha;
 | |
| };
 | |
| 
 | |
| struct VRAMReplacementName
 | |
| {
 | |
|   u64 low;
 | |
|   u64 high;
 | |
| 
 | |
|   TinyString ToString() const;
 | |
|   bool Parse(const std::string_view file_title);
 | |
| 
 | |
|   bool operator<(const VRAMReplacementName& rhs) const { return std::tie(low, high) < std::tie(rhs.low, rhs.high); }
 | |
|   bool operator==(const VRAMReplacementName& rhs) const { return low == rhs.low && high == rhs.high; }
 | |
|   bool operator!=(const VRAMReplacementName& rhs) const { return low != rhs.low || high != rhs.high; }
 | |
| };
 | |
| 
 | |
| struct VRAMReplacementNameHash
 | |
| {
 | |
|   size_t operator()(const VRAMReplacementName& hash) const;
 | |
| };
 | |
| 
 | |
| struct TextureReplacementIndex
 | |
| {
 | |
|   u64 src_hash;
 | |
|   GPUTextureMode mode;
 | |
| 
 | |
|   bool operator<(const TextureReplacementIndex& rhs) const
 | |
|   {
 | |
|     return std::tie(src_hash, mode) < std::tie(src_hash, mode);
 | |
|   }
 | |
|   bool operator==(const TextureReplacementIndex& rhs) const { return src_hash == rhs.src_hash && mode == rhs.mode; }
 | |
|   bool operator!=(const TextureReplacementIndex& rhs) const { return src_hash != rhs.src_hash || mode != rhs.mode; }
 | |
| };
 | |
| 
 | |
| struct TextureReplacementIndexHash
 | |
| {
 | |
|   size_t operator()(const TextureReplacementIndex& hash) const;
 | |
| };
 | |
| 
 | |
| struct TextureReplacementName
 | |
| {
 | |
|   u64 src_hash;
 | |
|   u64 pal_hash;
 | |
|   u16 src_width;
 | |
|   u16 src_height;
 | |
|   TextureReplacementType type;
 | |
|   u8 texture_mode;
 | |
|   u16 offset_x;
 | |
|   u16 offset_y;
 | |
|   u16 width;
 | |
|   u16 height;
 | |
|   u8 pal_min;
 | |
|   u8 pal_max;
 | |
| 
 | |
|   TinyString ToString() const;
 | |
|   bool Parse(const std::string_view file_title);
 | |
|   TextureReplacementIndex GetIndex() const;
 | |
|   GPUTextureMode GetTextureMode() const;
 | |
|   bool IsSemitransparent() const;
 | |
| 
 | |
|   bool operator<(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) < 0); }
 | |
|   bool operator==(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) == 0); }
 | |
|   bool operator!=(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) != 0); }
 | |
| 
 | |
|   ALWAYS_INLINE GSVector2i GetSizeVec() const { return GSVector2i(width, height); }
 | |
|   ALWAYS_INLINE GSVector2i GetOffsetVec() const { return GSVector2i(offset_x, offset_y); }
 | |
|   ALWAYS_INLINE GSVector4i GetDestRect() const
 | |
|   {
 | |
|     return GSVector4i(GSVector4i(GetOffsetVec()).xyxy().add32(GSVector4i(GetSizeVec()).zwxy()));
 | |
|   }
 | |
| };
 | |
| 
 | |
| struct DumpedTextureKey
 | |
| {
 | |
|   HashType tex_hash;
 | |
|   HashType pal_hash;
 | |
|   u16 offset_x, offset_y;
 | |
|   u16 width, height;
 | |
|   TextureReplacementType type;
 | |
|   u8 texture_mode;
 | |
|   u8 pad[6];
 | |
| 
 | |
|   ALWAYS_INLINE bool operator==(const DumpedTextureKey& k) const
 | |
|   {
 | |
|     return (std::memcmp(&k, this, sizeof(DumpedTextureKey)) == 0);
 | |
|   }
 | |
|   ALWAYS_INLINE bool operator!=(const DumpedTextureKey& k) const
 | |
|   {
 | |
|     return (std::memcmp(&k, this, sizeof(DumpedTextureKey)) != 0);
 | |
|   }
 | |
| 
 | |
|   static DumpedTextureKey FromName(const TextureReplacementName& name)
 | |
|   {
 | |
|     return DumpedTextureKey{name.src_hash, name.pal_hash,     name.offset_x,
 | |
|                             name.offset_y, name.width,        name.height,
 | |
|                             name.type,     name.texture_mode, {}};
 | |
|   }
 | |
| };
 | |
| struct DumpedTextureKeyHash
 | |
| {
 | |
|   size_t operator()(const DumpedTextureKey& k) const;
 | |
| };
 | |
| } // namespace
 | |
| 
 | |
| using HashCache = std::unordered_map<HashCacheKey, HashCacheEntry, HashCacheKeyHash>;
 | |
| using ReplacementImageCache = PreferUnorderedStringMap<TextureReplacementImage>;
 | |
| using GPUReplacementImageCache = PreferUnorderedStringMap<std::pair<std::unique_ptr<GPUTexture>, u32>>;
 | |
| 
 | |
| using VRAMReplacementMap = std::unordered_map<VRAMReplacementName, std::string, VRAMReplacementNameHash>;
 | |
| using TextureReplacementMap =
 | |
|   std::unordered_multimap<TextureReplacementIndex, std::pair<TextureReplacementName, std::string>,
 | |
|                           TextureReplacementIndexHash>;
 | |
| 
 | |
| static bool ShouldTrackVRAMWrites();
 | |
| static bool IsDumpingVRAMWriteTextures();
 | |
| static void UpdateVRAMTrackingState();
 | |
| 
 | |
| static void SetHashCacheTextureFormat();
 | |
| static bool CompilePipelines(Error* error);
 | |
| static void DestroyPipelines();
 | |
| 
 | |
| static const Source* ReturnSource(Source* source, const GSVector4i uv_rect, PaletteRecordFlags flags);
 | |
| static Source* CreateSource(SourceKey key);
 | |
| 
 | |
| static HashCacheKey GetHashCacheKey(SourceKey key, HashType tex_hash, HashType pal_hash);
 | |
| static HashCacheEntry* LookupHashCache(SourceKey key, HashType tex_hash, HashType pal_hash);
 | |
| static void ApplyTextureReplacements(SourceKey key, HashType tex_hash, HashType pal_hash, HashCacheEntry* entry);
 | |
| static void RemoveFromHashCache(HashCacheEntry* entry, SourceKey key, HashType tex_hash, HashType pal_hash);
 | |
| static void RemoveFromHashCache(HashCache::iterator it);
 | |
| static void ClearHashCache();
 | |
| 
 | |
| static bool IsPageDrawn(u32 page_index, const GSVector4i rect);
 | |
| static void InvalidatePageSources(u32 pn);
 | |
| static void InvalidatePageSources(u32 pn, const GSVector4i rc, bool remove_from_hash_cache = false);
 | |
| static void InvalidateSources();
 | |
| static void DestroySource(Source* src, bool remove_from_hash_cache = false);
 | |
| 
 | |
| static HashType HashPage(u8 page, GPUTextureMode mode);
 | |
| static HashType HashPalette(GPUTexturePaletteReg palette, GPUTextureMode mode);
 | |
| static HashType HashPartialPalette(const u16* palette, u32 min, u32 max);
 | |
| static HashType HashPartialPalette(GPUTexturePaletteReg palette, GPUTextureMode mode, u32 min, u32 max);
 | |
| static HashType HashRect(const GSVector4i rc);
 | |
| 
 | |
| static std::pair<u32, u32> ReducePaletteBounds(const GSVector4i rect, GPUTextureMode mode,
 | |
|                                                GPUTexturePaletteReg palette);
 | |
| static void SyncVRAMWritePaletteRecords(VRAMWrite* entry);
 | |
| static void InitializeVRAMWritePaletteRecord(VRAMWrite::PaletteRecord* record, SourceKey source_key,
 | |
|                                              const GSVector4i rect, PaletteRecordFlags flags);
 | |
| static void UpdateVRAMWriteSources(VRAMWrite* entry, SourceKey source_key, const GSVector4i global_uv_rect,
 | |
|                                    PaletteRecordFlags flags);
 | |
| static void SplitVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect);
 | |
| static bool TryMergeVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect);
 | |
| static void RemoveVRAMWrite(VRAMWrite* entry);
 | |
| static void DumpTexturesFromVRAMWrite(VRAMWrite* entry);
 | |
| static void DumpTextureFromPage(const Source* src);
 | |
| 
 | |
| static void DecodeTexture(GPUTextureMode mode, const u16* page_ptr, const u16* palette, u8* dest, u32 dest_stride,
 | |
|                           u32 width, u32 height, GPUTexture::Format dest_format);
 | |
| template<GPUTexture::Format dest_format>
 | |
| static void DecodeTexture4(const u16* page, const u16* palette, u32 width, u32 height, u8* dest, u32 dest_stride);
 | |
| template<GPUTexture::Format dest_format>
 | |
| static void DecodeTexture8(const u16* page, const u16* palette, u32 width, u32 height, u8* dest, u32 dest_stride);
 | |
| template<GPUTexture::Format dest_format>
 | |
| static void DecodeTexture16(const u16* page, u32 width, u32 height, u8* dest, u32 dest_stride);
 | |
| static void DecodeTexture(u8 page, GPUTexturePaletteReg palette, GPUTextureMode mode, GPUTexture* texture);
 | |
| 
 | |
| static std::optional<TextureReplacementType> GetTextureReplacementTypeFromFileTitle(const std::string_view file_title);
 | |
| static bool HasValidReplacementExtension(const std::string_view path);
 | |
| 
 | |
| static bool EnsureGameDirectoryExists();
 | |
| static std::string GetTextureReplacementDirectory();
 | |
| static std::string GetTextureDumpDirectory();
 | |
| 
 | |
| static VRAMReplacementName GetVRAMWriteHash(u32 width, u32 height, const void* pixels);
 | |
| static std::string GetVRAMWriteDumpPath(const VRAMReplacementName& name);
 | |
| 
 | |
| static bool IsMatchingReplacementPalette(HashType full_palette_hash, GPUTextureMode mode, GPUTexturePaletteReg palette,
 | |
|                                          const TextureReplacementName& name);
 | |
| static bool LoadLocalConfiguration(bool load_vram_write_replacement_aliases, bool load_texture_replacement_aliases);
 | |
| 
 | |
| static void FindTextureReplacements(bool load_vram_write_replacements, bool load_texture_replacements,
 | |
|                                     bool prefill_dumped_texture_list, bool prefill_dumped_vram_list);
 | |
| static void LoadTextureReplacementAliases(const ryml::ConstNodeRef& root, bool load_vram_write_replacement_aliases,
 | |
|                                           bool load_texture_replacement_aliases);
 | |
| 
 | |
| static const TextureReplacementImage* GetTextureReplacementImage(const std::string& path);
 | |
| static GPUTexture* GetTextureReplacementGPUImage(const std::string& path);
 | |
| static void CompactTextureReplacementGPUImages();
 | |
| static void PreloadReplacementTextures();
 | |
| static void PurgeUnreferencedTexturesFromCache();
 | |
| 
 | |
| static void DumpTexture(TextureReplacementType type, u32 offset_x, u32 offset_y, u32 src_width, u32 src_height,
 | |
|                         GPUTextureMode mode, HashType src_hash, HashType pal_hash, u32 pal_min, u32 pal_max,
 | |
|                         const u16* palette, const GSVector4i rect, PaletteRecordFlags flags);
 | |
| 
 | |
| static bool HasVRAMWriteTextureReplacements();
 | |
| static void GetVRAMWriteTextureReplacements(std::vector<TextureReplacementSubImage>& replacements,
 | |
|                                             HashType vram_write_hash, HashType palette_hash, GPUTextureMode mode,
 | |
|                                             GPUTexturePaletteReg palette, const GSVector2i& offset_to_page);
 | |
| 
 | |
| static bool HasTexturePageTextureReplacements();
 | |
| static void GetTexturePageTextureReplacements(std::vector<TextureReplacementSubImage>& replacements,
 | |
|                                               u32 start_page_number, HashType page_hash, HashType palette_hash,
 | |
|                                               GPUTextureMode mode, GPUTexturePaletteReg palette);
 | |
| 
 | |
| template<typename T>
 | |
| ALWAYS_INLINE_RELEASE static void ListPrepend(TList<T>* list, T* item, TListNode<T>* item_node)
 | |
| {
 | |
|   item_node->ref = item;
 | |
|   item_node->list = list;
 | |
|   item_node->prev = nullptr;
 | |
|   if (list->tail)
 | |
|   {
 | |
|     item_node->next = list->head;
 | |
|     list->head->prev = item_node;
 | |
|     list->head = item_node;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     item_node->next = nullptr;
 | |
|     list->head = item_node;
 | |
|     list->tail = item_node;
 | |
|   }
 | |
| }
 | |
| 
 | |
| template<typename T>
 | |
| ALWAYS_INLINE_RELEASE static void ListAppend(TList<T>* list, T* item, TListNode<T>* item_node)
 | |
| {
 | |
|   item_node->ref = item;
 | |
|   item_node->list = list;
 | |
|   item_node->next = nullptr;
 | |
|   if (list->tail)
 | |
|   {
 | |
|     item_node->prev = list->tail;
 | |
|     list->tail->next = item_node;
 | |
|     list->tail = item_node;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     item_node->prev = nullptr;
 | |
|     list->head = item_node;
 | |
|     list->tail = item_node;
 | |
|   }
 | |
| }
 | |
| 
 | |
| template<typename T>
 | |
| ALWAYS_INLINE_RELEASE static void ListMoveToFront(TList<T>* list, TListNode<T>* item_node)
 | |
| {
 | |
|   DebugAssert(list->head);
 | |
|   if (!item_node->prev)
 | |
|     return;
 | |
| 
 | |
|   item_node->prev->next = item_node->next;
 | |
|   if (item_node->next)
 | |
|     item_node->next->prev = item_node->prev;
 | |
|   else
 | |
|     list->tail = item_node->prev;
 | |
| 
 | |
|   item_node->prev = nullptr;
 | |
|   list->head->prev = item_node;
 | |
|   item_node->next = list->head;
 | |
|   list->head = item_node;
 | |
| }
 | |
| 
 | |
| template<typename T>
 | |
| ALWAYS_INLINE_RELEASE static void ListUnlink(const TListNode<T>& node)
 | |
| {
 | |
|   if (node.prev)
 | |
|     node.prev->next = node.next;
 | |
|   else
 | |
|     node.list->head = node.next;
 | |
|   if (node.next)
 | |
|     node.next->prev = node.prev;
 | |
|   else
 | |
|     node.list->tail = node.prev;
 | |
| }
 | |
| 
 | |
| template<typename T, typename F>
 | |
| ALWAYS_INLINE_RELEASE static void ListIterate(const TList<T>& list, const F& f)
 | |
| {
 | |
|   for (const GPUTextureCache::TListNode<T>* n = list.head; n;)
 | |
|   {
 | |
|     const GPUTextureCache::TListNode<T>* tn = n;
 | |
|     n = n->next;
 | |
|     f(tn->ref);
 | |
|   }
 | |
| }
 | |
| 
 | |
| template<typename T, typename F>
 | |
| ALWAYS_INLINE_RELEASE static bool ListIterateWithEarlyExit(const TList<T>& list, const F& f)
 | |
| {
 | |
|   for (const GPUTextureCache::TListNode<T>* n = list.head; n; n = n->next)
 | |
|   {
 | |
|     if (!f(n->ref))
 | |
|       return false;
 | |
|   }
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template<typename F>
 | |
| ALWAYS_INLINE_RELEASE static void LoopRectPages(u32 left, u32 top, u32 right, u32 bottom, const F& f)
 | |
| {
 | |
|   DebugAssert(right <= VRAM_WIDTH && bottom <= VRAM_HEIGHT);
 | |
|   DebugAssert((right - left) > 0 && (bottom - top) > 0);
 | |
| 
 | |
|   const u32 start_x = left / VRAM_PAGE_WIDTH;
 | |
|   const u32 end_x = (right - 1) / VRAM_PAGE_WIDTH;
 | |
|   const u32 start_y = top / VRAM_PAGE_HEIGHT;
 | |
|   const u32 end_y = (bottom - 1) / VRAM_PAGE_HEIGHT;
 | |
| 
 | |
|   u32 page_number = VRAMPageIndex(start_x, start_y);
 | |
|   for (u32 page_y = start_y; page_y <= end_y; page_y++)
 | |
|   {
 | |
|     u32 y_page_number = page_number;
 | |
| 
 | |
|     for (u32 page_x = start_x; page_x <= end_x; page_x++)
 | |
|       f(y_page_number++);
 | |
| 
 | |
|     page_number += VRAM_PAGES_WIDE;
 | |
|   }
 | |
| }
 | |
| 
 | |
| template<typename F>
 | |
| ALWAYS_INLINE_RELEASE static bool LoopRectPagesWithEarlyExit(u32 left, u32 top, u32 right, u32 bottom, const F& f)
 | |
| {
 | |
|   DebugAssert(right <= VRAM_WIDTH && bottom <= VRAM_HEIGHT);
 | |
|   DebugAssert((right - left) > 0 && (bottom - top) > 0);
 | |
| 
 | |
|   const u32 start_x = left / VRAM_PAGE_WIDTH;
 | |
|   const u32 end_x = (right - 1) / VRAM_PAGE_WIDTH;
 | |
|   const u32 start_y = top / VRAM_PAGE_HEIGHT;
 | |
|   const u32 end_y = (bottom - 1) / VRAM_PAGE_HEIGHT;
 | |
| 
 | |
|   u32 page_number = VRAMPageIndex(start_x, start_y);
 | |
|   for (u32 page_y = start_y; page_y <= end_y; page_y++)
 | |
|   {
 | |
|     u32 y_page_number = page_number;
 | |
| 
 | |
|     for (u32 page_x = start_x; page_x <= end_x; page_x++)
 | |
|     {
 | |
|       if (!f(y_page_number++))
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     page_number += VRAM_PAGES_WIDE;
 | |
|   }
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template<typename F>
 | |
| ALWAYS_INLINE_RELEASE static void LoopRectPages(const GSVector4i& rc, const F& f)
 | |
| {
 | |
|   LoopRectPages(rc.left, rc.top, rc.right, rc.bottom, f);
 | |
| }
 | |
| 
 | |
| template<typename F>
 | |
| ALWAYS_INLINE_RELEASE static bool LoopRectPagesWithEarlyExit(const GSVector4i& rc, const F& f)
 | |
| {
 | |
|   return LoopRectPagesWithEarlyExit(rc.left, rc.top, rc.right, rc.bottom, f);
 | |
| }
 | |
| 
 | |
| template<typename F>
 | |
| ALWAYS_INLINE_RELEASE static void LoopXWrappedPages(u32 page, u32 num_pages, const F& f)
 | |
| {
 | |
|   for (u32 i = 0; i < num_pages; i++)
 | |
|     f((page & VRAM_PAGE_Y_MASK) | ((page + i) & VRAM_PAGE_X_MASK));
 | |
| }
 | |
| 
 | |
| ALWAYS_INLINE static void DoStateVector(StateWrapper& sw, GSVector4i* vec)
 | |
| {
 | |
|   sw.DoBytes(vec->S32, sizeof(vec->S32));
 | |
| }
 | |
| 
 | |
| ALWAYS_INLINE static float RectDistance(const GSVector4i& lhs, const GSVector4i& rhs)
 | |
| {
 | |
|   const GSVector4 flhs(lhs);
 | |
|   const GSVector4 frhs(rhs);
 | |
|   const GSVector2 clhs = flhs.xy() + ((flhs.zw() - flhs.xy()) * 0.5f);
 | |
|   const GSVector2 crhs = frhs.xy() + ((frhs.zw() - flhs.xy()) * 0.5f);
 | |
|   return clhs.dot(crhs);
 | |
| }
 | |
| 
 | |
| namespace {
 | |
| struct GPUTextureCacheState
 | |
| {
 | |
|   Settings::TextureReplacementSettings::Configuration config;
 | |
|   size_t hash_cache_memory_usage = 0;
 | |
|   VRAMWrite* last_vram_write = nullptr;
 | |
|   bool track_vram_writes = false;
 | |
| 
 | |
|   GPUTexture::Format hash_cache_texture_format = GPUTexture::Format::Unknown;
 | |
|   HashCache hash_cache;
 | |
|   GPU_HW* hw_backend = nullptr; // TODO:FIXME: remove me
 | |
| 
 | |
|   /// List of candidates for purging when the hash cache gets too large.
 | |
|   std::vector<std::pair<HashCache::iterator, s32>> hash_cache_purge_list;
 | |
| 
 | |
|   /// List of VRAM writes collected when saving state.
 | |
|   std::vector<VRAMWrite*> temp_vram_write_list;
 | |
| 
 | |
|   std::unique_ptr<GPUTexture> replacement_texture_render_target;
 | |
|   std::unique_ptr<GPUPipeline> replacement_upscale_pipeline;              // copies alpha as-is with optional bilinear
 | |
|   std::unique_ptr<GPUPipeline> replacement_draw_pipeline;                 // copies alpha as-is
 | |
|   std::unique_ptr<GPUPipeline> replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent)
 | |
| 
 | |
|   VRAMReplacementMap vram_replacements;
 | |
| 
 | |
|   // TODO: Combine these into one map?
 | |
|   TextureReplacementMap vram_write_texture_replacements;
 | |
|   TextureReplacementMap texture_page_texture_replacements;
 | |
| 
 | |
|   // TODO: Check the size, purge some when it gets too large.
 | |
|   ReplacementImageCache replacement_image_cache;
 | |
|   GPUReplacementImageCache gpu_replacement_image_cache;
 | |
|   size_t gpu_replacement_image_cache_vram_usage = 0;
 | |
|   std::vector<std::pair<GPUReplacementImageCache::iterator, s32>> gpu_replacement_image_cache_purge_list;
 | |
| 
 | |
|   std::unordered_set<VRAMReplacementName, VRAMReplacementNameHash> dumped_vram_writes;
 | |
|   std::unordered_set<DumpedTextureKey, DumpedTextureKeyHash> dumped_textures;
 | |
| 
 | |
|   ALIGN_TO_CACHE_LINE std::array<PageEntry, NUM_VRAM_PAGES> pages = {};
 | |
| };
 | |
| } // namespace
 | |
| 
 | |
| ALIGN_TO_CACHE_LINE GPUTextureCacheState s_state;
 | |
| 
 | |
| } // namespace GPUTextureCache
 | |
| 
 | |
| bool GPUTextureCache::ShouldTrackVRAMWrites()
 | |
| {
 | |
|   if (!g_gpu_settings.gpu_texture_cache)
 | |
|     return false;
 | |
| 
 | |
|   if (g_gpu_settings.texture_replacements.always_track_uploads)
 | |
|     return true;
 | |
| 
 | |
| #ifdef ALWAYS_TRACK_VRAM_WRITES
 | |
|   return true;
 | |
| #else
 | |
|   return (IsDumpingVRAMWriteTextures() ||
 | |
|           (g_gpu_settings.texture_replacements.enable_texture_replacements && HasVRAMWriteTextureReplacements()));
 | |
| #endif
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::IsDumpingVRAMWriteTextures()
 | |
| {
 | |
|   return (g_gpu_settings.texture_replacements.dump_textures && !s_state.config.dump_texture_pages);
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::Initialize(GPU_HW* backend, Error* error)
 | |
| {
 | |
|   s_state.hw_backend = backend;
 | |
| 
 | |
|   SetHashCacheTextureFormat();
 | |
|   ReloadTextureReplacements(false);
 | |
|   UpdateVRAMTrackingState();
 | |
|   if (!CompilePipelines(error))
 | |
|     return false;
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::UpdateSettings(bool use_texture_cache, const GPUSettings& old_settings, Error* error)
 | |
| {
 | |
|   const bool prev_tracking_state = s_state.track_vram_writes;
 | |
| 
 | |
|   // Reload textures if configuration changes.
 | |
|   const bool old_replacement_scale_linear_filter = s_state.config.replacement_scale_linear_filter;
 | |
|   if (LoadLocalConfiguration(false, false) ||
 | |
|       g_gpu_settings.texture_replacements.enable_texture_replacements !=
 | |
|         old_settings.texture_replacements.enable_texture_replacements ||
 | |
|       g_gpu_settings.texture_replacements.enable_vram_write_replacements !=
 | |
|         old_settings.texture_replacements.enable_vram_write_replacements ||
 | |
|       g_gpu_settings.texture_replacements.dump_textures != old_settings.texture_replacements.dump_textures ||
 | |
|       g_gpu_settings.texture_replacements.dump_vram_writes != old_settings.texture_replacements.dump_vram_writes ||
 | |
|       (g_gpu_settings.texture_replacements.dump_replaced_textures !=
 | |
|          old_settings.texture_replacements.dump_replaced_textures &&
 | |
|        (g_gpu_settings.texture_replacements.dump_textures || g_gpu_settings.texture_replacements.dump_vram_writes)))
 | |
|   {
 | |
|     if (use_texture_cache)
 | |
|     {
 | |
|       if (g_gpu_settings.texture_replacements.enable_texture_replacements !=
 | |
|             old_settings.texture_replacements.enable_texture_replacements ||
 | |
|           s_state.config.replacement_scale_linear_filter != old_replacement_scale_linear_filter)
 | |
|       {
 | |
|         DestroyPipelines();
 | |
|         if (!CompilePipelines(error)) [[unlikely]]
 | |
|         {
 | |
|           Error::AddPrefix(error, "Failed to compile pipelines on TC replacement settings change: ");
 | |
|           return false;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     ReloadTextureReplacements(false);
 | |
|   }
 | |
| 
 | |
|   UpdateVRAMTrackingState();
 | |
| 
 | |
|   if (s_state.track_vram_writes != prev_tracking_state)
 | |
|     Invalidate();
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::GetStateSize(StateWrapper& sw, u32* size)
 | |
| {
 | |
|   if (sw.GetVersion() < 73)
 | |
|   {
 | |
|     *size = 0;
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   const size_t start = sw.GetPosition();
 | |
|   if (!sw.DoMarker("GPUTextureCache")) [[unlikely]]
 | |
|     return false;
 | |
| 
 | |
|   u32 num_vram_writes = 0;
 | |
|   sw.Do(&num_vram_writes);
 | |
| 
 | |
|   for (u32 i = 0; i < num_vram_writes; i++)
 | |
|   {
 | |
|     sw.SkipBytes(sizeof(GSVector4i) * 2 + sizeof(HashType));
 | |
| 
 | |
|     u32 num_palette_records = 0;
 | |
|     sw.Do(&num_palette_records);
 | |
|     sw.SkipBytes(num_palette_records * STATE_PALETTE_RECORD_SIZE);
 | |
|   }
 | |
| 
 | |
|   if (sw.HasError()) [[unlikely]]
 | |
|     return false;
 | |
| 
 | |
|   *size = static_cast<u32>(sw.GetPosition() - start);
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::DoState(StateWrapper& sw, bool skip)
 | |
| {
 | |
|   if (sw.GetVersion() < 73)
 | |
|   {
 | |
|     if (!skip)
 | |
|       WARNING_LOG("Texture cache not in save state due to old version.");
 | |
| 
 | |
|     Invalidate();
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if (!sw.DoMarker("GPUTextureCache"))
 | |
|     return false;
 | |
| 
 | |
|   if (sw.IsReading())
 | |
|   {
 | |
|     if (!skip)
 | |
|       Invalidate();
 | |
| 
 | |
|     u32 num_vram_writes = 0;
 | |
|     sw.Do(&num_vram_writes);
 | |
| 
 | |
|     const bool skip_writes = (skip || !s_state.track_vram_writes);
 | |
| 
 | |
|     for (u32 i = 0; i < num_vram_writes; i++)
 | |
|     {
 | |
|       static constexpr u32 PALETTE_RECORD_SIZE = sizeof(GSVector4i) + sizeof(SourceKey) + sizeof(PaletteRecordFlags) +
 | |
|                                                  sizeof(HashType) + sizeof(u16) * MAX_CLUT_SIZE;
 | |
| 
 | |
|       if (skip_writes)
 | |
|       {
 | |
|         sw.SkipBytes(sizeof(GSVector4i) * 2 + sizeof(HashType));
 | |
| 
 | |
|         u32 num_palette_records = 0;
 | |
|         sw.Do(&num_palette_records);
 | |
|         sw.SkipBytes(num_palette_records * PALETTE_RECORD_SIZE);
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         VRAMWrite* vrw = new VRAMWrite();
 | |
|         DoStateVector(sw, &vrw->active_rect);
 | |
|         DoStateVector(sw, &vrw->write_rect);
 | |
|         sw.Do(&vrw->hash);
 | |
| 
 | |
|         u32 num_palette_records = 0;
 | |
|         sw.Do(&num_palette_records);
 | |
| 
 | |
|         // Skip palette records if we're not dumping now.
 | |
|         if (g_gpu_settings.texture_replacements.dump_textures)
 | |
|         {
 | |
|           vrw->palette_records.reserve(num_palette_records);
 | |
|           for (u32 j = 0; j < num_palette_records; j++)
 | |
|           {
 | |
|             VRAMWrite::PaletteRecord& rec = vrw->palette_records.emplace_back();
 | |
|             DoStateVector(sw, &rec.rect);
 | |
|             sw.DoBytes(&rec.key, sizeof(rec.key));
 | |
|             sw.Do(&rec.flags);
 | |
|             sw.Do(&rec.palette_hash);
 | |
|             sw.DoBytes(rec.palette, sizeof(rec.palette));
 | |
|           }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           sw.SkipBytes(num_palette_records * PALETTE_RECORD_SIZE);
 | |
|         }
 | |
| 
 | |
|         if (sw.HasError())
 | |
|         {
 | |
|           delete vrw;
 | |
|           Invalidate();
 | |
|           return false;
 | |
|         }
 | |
| 
 | |
|         vrw->num_page_refs = 0;
 | |
|         LoopRectPages(vrw->active_rect, [vrw](u32 pn) {
 | |
|           DebugAssert(vrw->num_page_refs < MAX_PAGE_REFS_PER_WRITE);
 | |
|           ListAppend(&s_state.pages[pn].writes, vrw, &vrw->page_refs[vrw->num_page_refs++]);
 | |
|           return true;
 | |
|         });
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     s_state.temp_vram_write_list.clear();
 | |
| 
 | |
|     if (!skip && s_state.track_vram_writes)
 | |
|     {
 | |
|       for (PageEntry& page : s_state.pages)
 | |
|       {
 | |
|         ListIterate(page.writes, [](VRAMWrite* vrw) {
 | |
|           if (std::find(s_state.temp_vram_write_list.begin(), s_state.temp_vram_write_list.end(), vrw) !=
 | |
|               s_state.temp_vram_write_list.end())
 | |
|           {
 | |
|             return;
 | |
|           }
 | |
| 
 | |
|           // try not to lose data... pull it from the sources
 | |
|           if (g_settings.texture_replacements.dump_textures)
 | |
|             SyncVRAMWritePaletteRecords(vrw);
 | |
| 
 | |
|           s_state.temp_vram_write_list.push_back(vrw);
 | |
|         });
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     u32 num_vram_writes = static_cast<u32>(s_state.temp_vram_write_list.size());
 | |
|     sw.Do(&num_vram_writes);
 | |
|     for (VRAMWrite* vrw : s_state.temp_vram_write_list)
 | |
|     {
 | |
|       DoStateVector(sw, &vrw->active_rect);
 | |
|       DoStateVector(sw, &vrw->write_rect);
 | |
|       sw.Do(&vrw->hash);
 | |
| 
 | |
|       u32 num_palette_records = static_cast<u32>(vrw->palette_records.size());
 | |
|       sw.Do(&num_palette_records);
 | |
|       for (VRAMWrite::PaletteRecord& rec : vrw->palette_records)
 | |
|       {
 | |
|         DoStateVector(sw, &rec.rect);
 | |
|         sw.DoBytes(&rec.key, sizeof(rec.key));
 | |
|         sw.Do(&rec.flags);
 | |
|         sw.Do(&rec.palette_hash);
 | |
|         sw.DoBytes(rec.palette, sizeof(rec.palette));
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return !sw.HasError();
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::Shutdown()
 | |
| {
 | |
|   Invalidate();
 | |
|   ClearHashCache();
 | |
|   DestroyPipelines();
 | |
|   s_state.replacement_texture_render_target.reset();
 | |
|   s_state.gpu_replacement_image_cache_purge_list = {};
 | |
|   s_state.hash_cache_purge_list = {};
 | |
|   s_state.temp_vram_write_list = {};
 | |
|   s_state.track_vram_writes = false;
 | |
|   s_state.hw_backend = nullptr;
 | |
| 
 | |
|   for (auto it = s_state.gpu_replacement_image_cache.begin(); it != s_state.gpu_replacement_image_cache.end();)
 | |
|   {
 | |
|     g_gpu_device->RecycleTexture(std::move(it->second.first));
 | |
|     it = s_state.gpu_replacement_image_cache.erase(it);
 | |
|   }
 | |
|   s_state.gpu_replacement_image_cache_vram_usage = 0;
 | |
| 
 | |
|   s_state.replacement_image_cache.clear();
 | |
|   s_state.vram_replacements.clear();
 | |
|   s_state.vram_write_texture_replacements.clear();
 | |
|   s_state.texture_page_texture_replacements.clear();
 | |
|   s_state.dumped_textures.clear();
 | |
|   s_state.dumped_vram_writes.clear();
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::SetHashCacheTextureFormat()
 | |
| {
 | |
|   // Prefer 16-bit texture formats where possible.
 | |
|   if (g_gpu_device->SupportsTextureFormat(GPUTexture::Format::RGB5A1))
 | |
|     s_state.hash_cache_texture_format = GPUTexture::Format::RGB5A1;
 | |
|   else if (g_gpu_device->SupportsTextureFormat(GPUTexture::Format::A1BGR5))
 | |
|     s_state.hash_cache_texture_format = GPUTexture::Format::A1BGR5;
 | |
|   else
 | |
|     s_state.hash_cache_texture_format = GPUTexture::Format::RGBA8;
 | |
| 
 | |
|   INFO_LOG("Using {} format for hash cache entries.", GPUTexture::GetFormatName(s_state.hash_cache_texture_format));
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::CompilePipelines(Error* error)
 | |
| {
 | |
|   if (!g_gpu_settings.texture_replacements.enable_texture_replacements)
 | |
|     return true;
 | |
| 
 | |
|   GPUPipeline::GraphicsConfig plconfig = {};
 | |
|   plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
 | |
|   plconfig.input_layout.vertex_attributes = {};
 | |
|   plconfig.input_layout.vertex_stride = 0;
 | |
|   plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
 | |
|   plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
 | |
|   plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
 | |
|   plconfig.primitive = GPUPipeline::Primitive::Triangles;
 | |
|   plconfig.geometry_shader = nullptr;
 | |
|   plconfig.SetTargetFormats(REPLACEMENT_TEXTURE_FORMAT);
 | |
| 
 | |
|   // Most flags don't matter here.
 | |
|   const GPUDevice::Features features = g_gpu_device->GetFeatures();
 | |
|   const GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), features.dual_source_blend,
 | |
|                                    features.framebuffer_fetch);
 | |
|   std::unique_ptr<GPUShader> fullscreen_quad_vertex_shader = g_gpu_device->CreateShader(
 | |
|     GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateScreenQuadVertexShader());
 | |
|   if (!fullscreen_quad_vertex_shader)
 | |
|     return false;
 | |
| 
 | |
|   plconfig.vertex_shader = fullscreen_quad_vertex_shader.get();
 | |
| 
 | |
|   std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
 | |
|     GPUShaderStage::Fragment, shadergen.GetLanguage(),
 | |
|     shadergen.GenerateReplacementMergeFragmentShader(false, false, s_state.config.replacement_scale_linear_filter));
 | |
|   if (!fs)
 | |
|     return false;
 | |
|   GL_OBJECT_NAME(fs, "Replacement upscale shader");
 | |
|   plconfig.fragment_shader = fs.get();
 | |
|   if (!(s_state.replacement_upscale_pipeline = g_gpu_device->CreatePipeline(plconfig)))
 | |
|     return false;
 | |
|   GL_OBJECT_NAME(s_state.replacement_upscale_pipeline, "Replacement upscale pipeline");
 | |
| 
 | |
|   fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
 | |
|                                   shadergen.GenerateReplacementMergeFragmentShader(true, false, false));
 | |
|   if (!fs)
 | |
|     return false;
 | |
|   GL_OBJECT_NAME(fs, "Replacement draw shader");
 | |
|   plconfig.fragment_shader = fs.get();
 | |
|   if (!(s_state.replacement_draw_pipeline = g_gpu_device->CreatePipeline(plconfig)))
 | |
|     return false;
 | |
|   GL_OBJECT_NAME(s_state.replacement_draw_pipeline, "Replacement draw pipeline");
 | |
| 
 | |
|   fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
 | |
|                                   shadergen.GenerateReplacementMergeFragmentShader(true, true, false));
 | |
|   if (!fs)
 | |
|     return false;
 | |
|   GL_OBJECT_NAME(fs, "Replacement semitransparent draw shader");
 | |
|   plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
 | |
|   plconfig.fragment_shader = fs.get();
 | |
|   if (!(s_state.replacement_semitransparent_draw_pipeline = g_gpu_device->CreatePipeline(plconfig)))
 | |
|     return false;
 | |
|   GL_OBJECT_NAME(s_state.replacement_semitransparent_draw_pipeline, "Replacement semitransparent draw pipeline");
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::DestroyPipelines()
 | |
| {
 | |
|   s_state.replacement_upscale_pipeline.reset();
 | |
|   s_state.replacement_draw_pipeline.reset();
 | |
|   s_state.replacement_semitransparent_draw_pipeline.reset();
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::AddDrawnRectangle(const GSVector4i rect, const GSVector4i clip_rect)
 | |
| {
 | |
|   // TODO: This might be a bit slow...
 | |
|   LoopRectPages(rect, [&rect, &clip_rect](u32 pn) {
 | |
|     PageEntry& page = s_state.pages[pn];
 | |
| 
 | |
|     for (TListNode<VRAMWrite>* n = page.writes.head; n;)
 | |
|     {
 | |
|       VRAMWrite* it = n->ref;
 | |
|       n = n->next;
 | |
|       if (it->active_rect.rintersects(rect))
 | |
|         RemoveVRAMWrite(it);
 | |
|     }
 | |
| 
 | |
|     const GSVector4i rc = rect.rintersect(VRAMPageRect(pn));
 | |
|     if (page.num_draw_rects > 0)
 | |
|     {
 | |
|       u32 candidate = page.num_draw_rects;
 | |
|       for (u32 i = 0; i < page.num_draw_rects; i++)
 | |
|       {
 | |
|         const GSVector4i page_draw_rect = page.draw_rects[i];
 | |
|         if (page_draw_rect.rcontains(rc))
 | |
|         {
 | |
|           // already contained
 | |
|           return;
 | |
|         }
 | |
|         else if (clip_rect.rintersects(page_draw_rect))
 | |
|         {
 | |
|           // this one's probably for the draw rect, so use it
 | |
|           candidate = i;
 | |
|         }
 | |
|       }
 | |
|       if (candidate == NUM_PAGE_DRAW_RECTS)
 | |
|       {
 | |
|         // we're out of draw rects.. pick the one that's the closest, and hope for the best
 | |
|         GL_INS_FMT("Out of draw rects for page {}", pn);
 | |
|         candidate = 0;
 | |
|         float closest_dist = RectDistance(rc, page.draw_rects[0]);
 | |
|         for (u32 i = 1; i < NUM_PAGE_DRAW_RECTS; i++)
 | |
|         {
 | |
|           const float dist = RectDistance(rc, page.draw_rects[i]);
 | |
|           candidate = (dist < closest_dist) ? i : candidate;
 | |
|           closest_dist = (dist < closest_dist) ? dist : closest_dist;
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       if (candidate != page.num_draw_rects)
 | |
|       {
 | |
|         const GSVector4i new_draw_rect = page.draw_rects[candidate].runion(rc);
 | |
|         page.draw_rects[candidate] = new_draw_rect;
 | |
|         InvalidatePageSources(pn, new_draw_rect);
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         DebugAssert(page.num_draw_rects < NUM_PAGE_DRAW_RECTS);
 | |
|         page.draw_rects[candidate] = rc;
 | |
|         page.num_draw_rects++;
 | |
|         InvalidatePageSources(pn, rc);
 | |
|       }
 | |
| 
 | |
|       page.total_draw_rect = page.total_draw_rect.runion(rc);
 | |
|       GL_INS_FMT("Page {} drawn rect is now {}", pn, page.total_draw_rect);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       GL_INS_FMT("Page {} drawn rect is now {}", pn, rc);
 | |
|       page.total_draw_rect = rc;
 | |
|       page.draw_rects[0] = rc;
 | |
|       page.num_draw_rects = 1;
 | |
| 
 | |
|       // remove all sources, let them re-lookup if needed
 | |
|       InvalidatePageSources(pn, rc);
 | |
|     }
 | |
|   });
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool check_mask,
 | |
|                                bool set_mask, const GSVector4i src_bounds, const GSVector4i dst_bounds)
 | |
| {
 | |
|   const bool convert_copies_to_writes = s_state.config.convert_copies_to_writes;
 | |
| 
 | |
|   // first dump out any overlapping writes with the old data
 | |
|   if (convert_copies_to_writes)
 | |
|   {
 | |
|     LoopRectPages(dst_bounds, [&dst_bounds](u32 pn) {
 | |
|       PageEntry& page = s_state.pages[pn];
 | |
|       for (TListNode<VRAMWrite>* n = page.writes.head; n; n = n->next)
 | |
|       {
 | |
|         VRAMWrite* it = n->ref;
 | |
|         if (it->active_rect.rintersects(dst_bounds))
 | |
|         {
 | |
|           SyncVRAMWritePaletteRecords(it);
 | |
|           DumpTexturesFromVRAMWrite(it);
 | |
|         }
 | |
|       }
 | |
|     });
 | |
|   }
 | |
| 
 | |
|   // copy and invalidate
 | |
|   GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, check_mask, set_mask);
 | |
|   AddWrittenRectangle(dst_bounds, convert_copies_to_writes, true);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::WriteVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask,
 | |
|                                 const GSVector4i bounds)
 | |
| {
 | |
|   GPU_SW_Rasterizer::WriteVRAM(x, y, width, height, data, set_mask, check_mask);
 | |
| 
 | |
|   if (!s_state.track_vram_writes)
 | |
|     return;
 | |
| 
 | |
|   if (s_state.last_vram_write && TryMergeVRAMWrite(s_state.last_vram_write, bounds))
 | |
|     return;
 | |
| 
 | |
|   VRAMWrite* it = new VRAMWrite();
 | |
|   it->active_rect = bounds;
 | |
|   it->write_rect = bounds;
 | |
|   it->hash = HashRect(bounds);
 | |
|   it->num_page_refs = 0;
 | |
|   LoopRectPages(bounds, [it](u32 pn) {
 | |
|     DebugAssert(it->num_page_refs < MAX_PAGE_REFS_PER_WRITE);
 | |
|     ListAppend(&s_state.pages[pn].writes, it, &it->page_refs[it->num_page_refs++]);
 | |
|     return true;
 | |
|   });
 | |
| 
 | |
|   DEV_LOG("New VRAM write {:016X} at {} touching {} pages", it->hash, bounds, it->num_page_refs);
 | |
|   s_state.last_vram_write = it;
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::AddWrittenRectangle(const GSVector4i rect, bool update_vram_writes, bool remove_from_hash_cache)
 | |
| {
 | |
|   LoopRectPages(rect, [&rect, &update_vram_writes, &remove_from_hash_cache](u32 pn) {
 | |
|     PageEntry& page = s_state.pages[pn];
 | |
|     InvalidatePageSources(pn, rect, remove_from_hash_cache);
 | |
| 
 | |
|     if (page.num_draw_rects > 0)
 | |
|     {
 | |
|       const u32 prev_draw_rects = page.num_draw_rects;
 | |
|       for (u32 i = 0; i < page.num_draw_rects;)
 | |
|       {
 | |
|         const GSVector4i page_draw_rect = page.draw_rects[i];
 | |
|         if (!page_draw_rect.rintersects(rect))
 | |
|         {
 | |
|           i++;
 | |
|           continue;
 | |
|         }
 | |
| 
 | |
|         GL_INS_FMT("Clearing page {} draw rect {} due to write", pn, page_draw_rect);
 | |
|         page.num_draw_rects--;
 | |
|         if (page.num_draw_rects > 0)
 | |
|         {
 | |
|           // reorder it
 | |
|           const u32 remaining_rects = page.num_draw_rects - i;
 | |
|           if (remaining_rects > 0)
 | |
|             std::memmove(&page.draw_rects[i], &page.draw_rects[i + 1], sizeof(GSVector4i) * remaining_rects);
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       if (page.num_draw_rects != prev_draw_rects)
 | |
|       {
 | |
|         if (page.num_draw_rects == 0)
 | |
|         {
 | |
|           page.total_draw_rect = INVALID_RECT;
 | |
|           GL_INS_FMT("Page {} no longer has any draw rects", pn);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           GSVector4i new_total_draw_rect = page.draw_rects[0];
 | |
|           for (u32 i = 1; i < page.num_draw_rects; i++)
 | |
|             new_total_draw_rect = new_total_draw_rect.runion(page.draw_rects[i]);
 | |
|           page.total_draw_rect = new_total_draw_rect;
 | |
|           GL_INS_FMT("Page {} total draw rect is now {}", pn, new_total_draw_rect);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     for (TListNode<VRAMWrite>* n = page.writes.head; n;)
 | |
|     {
 | |
|       VRAMWrite* it = n->ref;
 | |
|       n = n->next;
 | |
| 
 | |
|       const GSVector4i intersection = it->active_rect.rintersect(rect);
 | |
|       if (!intersection.rempty())
 | |
|       {
 | |
|         if (update_vram_writes && it->active_rect.rcontains(rect))
 | |
|         {
 | |
|           const HashType new_hash = HashRect(it->write_rect);
 | |
|           DEV_LOG("New VRAM write hash {:016X} => {:016X}", it->hash, new_hash);
 | |
|           it->hash = new_hash;
 | |
|         }
 | |
|         else if (it->num_splits < s_state.config.max_vram_write_splits && !it->active_rect.eq(intersection))
 | |
|         {
 | |
|           SplitVRAMWrite(it, intersection);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           RemoveVRAMWrite(it);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|   });
 | |
| }
 | |
| 
 | |
| [[maybe_unused]] ALWAYS_INLINE static TinyString SourceKeyToString(const GPUTextureCache::SourceKey& key)
 | |
| {
 | |
|   static constexpr const std::array<const char*, 4> texture_modes = {
 | |
|     {"Palette4Bit", "Palette8Bit", "Direct16Bit", "Reserved_Direct16Bit"}};
 | |
| 
 | |
|   TinyString ret;
 | |
|   if (key.mode < GPUTextureMode::Direct16Bit)
 | |
|   {
 | |
|     ret.format("{} Page[{}] CLUT@[{},{}]", texture_modes[static_cast<u8>(key.mode)], key.page, key.palette.GetXBase(),
 | |
|                key.palette.GetYBase());
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     ret.format("{} Page[{}]", texture_modes[static_cast<u8>(key.mode)], key.page);
 | |
|   }
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| [[maybe_unused]] ALWAYS_INLINE static TinyString SourceToString(const GPUTextureCache::Source* src)
 | |
| {
 | |
|   return SourceKeyToString(src->key);
 | |
| }
 | |
| 
 | |
| ALWAYS_INLINE_RELEASE static const u16* VRAMPagePointer(u32 pn)
 | |
| {
 | |
|   const u32 start_y = VRAMPageStartY(pn);
 | |
|   const u32 start_x = VRAMPageStartX(pn);
 | |
|   return &g_vram[start_y * VRAM_WIDTH + start_x];
 | |
| }
 | |
| 
 | |
| ALWAYS_INLINE_RELEASE static const u16* VRAMPalettePointer(GPUTexturePaletteReg palette)
 | |
| {
 | |
|   return &g_vram[VRAM_WIDTH * palette.GetYBase() + palette.GetXBase()];
 | |
| }
 | |
| 
 | |
| template<GPUTexture::Format format>
 | |
| void GPUTextureCache::DecodeTexture4(const u16* page, const u16* palette, u32 width, u32 height, u8* dest,
 | |
|                                      u32 dest_stride)
 | |
| {
 | |
|   if ((width % 4u) == 0)
 | |
|   {
 | |
|     const u32 vram_width = width / 4;
 | |
|     [[maybe_unused]] constexpr u32 vram_pixels_per_vec = 2;
 | |
|     [[maybe_unused]] const u32 aligned_vram_width = Common::AlignDownPow2(vram_width, vram_pixels_per_vec);
 | |
| 
 | |
|     for (u32 y = 0; y < height; y++)
 | |
|     {
 | |
|       const u16* page_ptr = page;
 | |
|       u8* dest_ptr = dest;
 | |
|       u32 x = 0;
 | |
| 
 | |
| #ifdef CPU_ARCH_SIMD
 | |
|       for (; x < aligned_vram_width; x += vram_pixels_per_vec)
 | |
|       {
 | |
|         // No variable shift without AVX, kinda pointless to vectorize the extract...
 | |
|         alignas(VECTOR_ALIGNMENT) u16 c16[vram_pixels_per_vec * 4];
 | |
|         u32 pp = *(page_ptr++);
 | |
|         c16[0] = palette[pp & 0x0F];
 | |
|         c16[1] = palette[(pp >> 4) & 0x0F];
 | |
|         c16[2] = palette[(pp >> 8) & 0x0F];
 | |
|         c16[3] = palette[pp >> 12];
 | |
|         pp = *(page_ptr++);
 | |
|         c16[4] = palette[pp & 0x0F];
 | |
|         c16[5] = palette[(pp >> 4) & 0x0F];
 | |
|         c16[6] = palette[(pp >> 8) & 0x0F];
 | |
|         c16[7] = palette[pp >> 12];
 | |
|         ConvertVRAMPixels<format>(dest_ptr, GSVector4i::load<true>(c16));
 | |
|       }
 | |
| #endif
 | |
| 
 | |
|       for (; x < vram_width; x++)
 | |
|       {
 | |
|         const u32 pp = *(page_ptr++);
 | |
|         ConvertVRAMPixel<format>(dest_ptr, palette[pp & 0x0F]);
 | |
|         ConvertVRAMPixel<format>(dest_ptr, palette[(pp >> 4) & 0x0F]);
 | |
|         ConvertVRAMPixel<format>(dest_ptr, palette[(pp >> 8) & 0x0F]);
 | |
|         ConvertVRAMPixel<format>(dest_ptr, palette[pp >> 12]);
 | |
|       }
 | |
| 
 | |
|       page += VRAM_WIDTH;
 | |
|       dest += dest_stride;
 | |
|     }
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     for (u32 y = 0; y < height; y++)
 | |
|     {
 | |
|       const u16* page_ptr = page;
 | |
|       u8* dest_ptr = dest;
 | |
| 
 | |
|       u32 offs = 0;
 | |
|       u16 texel = 0;
 | |
|       for (u32 x = 0; x < width; x++)
 | |
|       {
 | |
|         if (offs == 0)
 | |
|           texel = *(page_ptr++);
 | |
| 
 | |
|         ConvertVRAMPixel<format>(dest_ptr, palette[texel & 0x0F]);
 | |
|         texel >>= 4;
 | |
| 
 | |
|         offs = (offs + 1) % 4;
 | |
|       }
 | |
| 
 | |
|       page += VRAM_WIDTH;
 | |
|       dest += dest_stride;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| template<GPUTexture::Format format>
 | |
| void GPUTextureCache::DecodeTexture8(const u16* page, const u16* palette, u32 width, u32 height, u8* dest,
 | |
|                                      u32 dest_stride)
 | |
| {
 | |
|   if ((width % 2u) == 0)
 | |
|   {
 | |
|     const u32 vram_width = width / 2;
 | |
|     [[maybe_unused]] constexpr u32 vram_pixels_per_vec = 4;
 | |
|     [[maybe_unused]] const u32 aligned_vram_width = Common::AlignDownPow2(vram_width, vram_pixels_per_vec);
 | |
| 
 | |
|     for (u32 y = 0; y < height; y++)
 | |
|     {
 | |
|       const u16* page_ptr = page;
 | |
|       u8* dest_ptr = dest;
 | |
|       u32 x = 0;
 | |
| 
 | |
| #ifdef CPU_ARCH_SIMD
 | |
|       for (; x < aligned_vram_width; x += vram_pixels_per_vec)
 | |
|       {
 | |
|         // No variable shift without AVX, kinda pointless to vectorize the extract...
 | |
|         alignas(VECTOR_ALIGNMENT) u16 c16[vram_pixels_per_vec * 2];
 | |
|         u32 pp = *(page_ptr++);
 | |
|         c16[0] = palette[pp & 0xFF];
 | |
|         c16[1] = palette[(pp >> 8) & 0xFF];
 | |
|         pp = *(page_ptr++);
 | |
|         c16[2] = palette[pp & 0xFF];
 | |
|         c16[3] = palette[(pp >> 8) & 0xFF];
 | |
|         pp = *(page_ptr++);
 | |
|         c16[4] = palette[pp & 0xFF];
 | |
|         c16[5] = palette[(pp >> 8) & 0xFF];
 | |
|         pp = *(page_ptr++);
 | |
|         c16[6] = palette[pp & 0xFF];
 | |
|         c16[7] = palette[(pp >> 8) & 0xFF];
 | |
|         ConvertVRAMPixels<format>(dest_ptr, GSVector4i::load<true>(c16));
 | |
|       }
 | |
| #endif
 | |
| 
 | |
|       for (; x < vram_width; x++)
 | |
|       {
 | |
|         const u32 pp = *(page_ptr++);
 | |
|         ConvertVRAMPixel<format>(dest_ptr, palette[pp & 0xFF]);
 | |
|         ConvertVRAMPixel<format>(dest_ptr, palette[pp >> 8]);
 | |
|       }
 | |
| 
 | |
|       page += VRAM_WIDTH;
 | |
|       dest += dest_stride;
 | |
|     }
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     for (u32 y = 0; y < height; y++)
 | |
|     {
 | |
|       const u16* page_ptr = page;
 | |
|       u8* dest_ptr = dest;
 | |
| 
 | |
|       u32 offs = 0;
 | |
|       u16 texel = 0;
 | |
|       for (u32 x = 0; x < width; x++)
 | |
|       {
 | |
|         if (offs == 0)
 | |
|           texel = *(page_ptr++);
 | |
| 
 | |
|         ConvertVRAMPixel<format>(dest_ptr, palette[texel & 0xFF]);
 | |
|         texel >>= 8;
 | |
| 
 | |
|         offs ^= 1;
 | |
|       }
 | |
| 
 | |
|       page += VRAM_WIDTH;
 | |
|       dest += dest_stride;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| template<GPUTexture::Format format>
 | |
| void GPUTextureCache::DecodeTexture16(const u16* page, u32 width, u32 height, u8* dest, u32 dest_stride)
 | |
| {
 | |
|   [[maybe_unused]] constexpr u32 pixels_per_vec = 8;
 | |
|   [[maybe_unused]] const u32 aligned_width = Common::AlignDownPow2(width, pixels_per_vec);
 | |
| 
 | |
|   for (u32 y = 0; y < height; y++)
 | |
|   {
 | |
|     const u16* page_ptr = page;
 | |
|     u8* dest_ptr = dest;
 | |
|     u32 x = 0;
 | |
| 
 | |
| #ifdef CPU_ARCH_SIMD
 | |
|     for (; x < aligned_width; x += pixels_per_vec)
 | |
|     {
 | |
|       ConvertVRAMPixels<format>(dest_ptr, GSVector4i::load<false>(page_ptr));
 | |
|       page_ptr += pixels_per_vec;
 | |
|     }
 | |
| #endif
 | |
| 
 | |
|     for (; x < width; x++)
 | |
|       ConvertVRAMPixel<format>(dest_ptr, *(page_ptr++));
 | |
| 
 | |
|     page += VRAM_WIDTH;
 | |
|     dest += dest_stride;
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::DecodeTexture(GPUTextureMode mode, const u16* page_ptr, const u16* palette, u8* dest,
 | |
|                                     u32 dest_stride, u32 width, u32 height, GPUTexture::Format dest_format)
 | |
| {
 | |
|   if (dest_format == GPUTexture::Format::RGBA8)
 | |
|   {
 | |
|     switch (mode)
 | |
|     {
 | |
|       case GPUTextureMode::Palette4Bit:
 | |
|         DecodeTexture4<GPUTexture::Format::RGBA8>(page_ptr, palette, width, height, dest, dest_stride);
 | |
|         break;
 | |
|       case GPUTextureMode::Palette8Bit:
 | |
|         DecodeTexture8<GPUTexture::Format::RGBA8>(page_ptr, palette, width, height, dest, dest_stride);
 | |
|         break;
 | |
|       case GPUTextureMode::Direct16Bit:
 | |
|       case GPUTextureMode::Reserved_Direct16Bit:
 | |
|         DecodeTexture16<GPUTexture::Format::RGBA8>(page_ptr, width, height, dest, dest_stride);
 | |
|         break;
 | |
| 
 | |
|         DefaultCaseIsUnreachable()
 | |
|     }
 | |
|   }
 | |
|   else if (dest_format == GPUTexture::Format::RGB5A1)
 | |
|   {
 | |
|     switch (mode)
 | |
|     {
 | |
|       case GPUTextureMode::Palette4Bit:
 | |
|         DecodeTexture4<GPUTexture::Format::RGB5A1>(page_ptr, palette, width, height, dest, dest_stride);
 | |
|         break;
 | |
|       case GPUTextureMode::Palette8Bit:
 | |
|         DecodeTexture8<GPUTexture::Format::RGB5A1>(page_ptr, palette, width, height, dest, dest_stride);
 | |
|         break;
 | |
|       case GPUTextureMode::Direct16Bit:
 | |
|       case GPUTextureMode::Reserved_Direct16Bit:
 | |
|         DecodeTexture16<GPUTexture::Format::RGB5A1>(page_ptr, width, height, dest, dest_stride);
 | |
|         break;
 | |
| 
 | |
|         DefaultCaseIsUnreachable()
 | |
|     }
 | |
|   }
 | |
|   else if (dest_format == GPUTexture::Format::A1BGR5)
 | |
|   {
 | |
|     switch (mode)
 | |
|     {
 | |
|       case GPUTextureMode::Palette4Bit:
 | |
|         DecodeTexture4<GPUTexture::Format::A1BGR5>(page_ptr, palette, width, height, dest, dest_stride);
 | |
|         break;
 | |
|       case GPUTextureMode::Palette8Bit:
 | |
|         DecodeTexture8<GPUTexture::Format::A1BGR5>(page_ptr, palette, width, height, dest, dest_stride);
 | |
|         break;
 | |
|       case GPUTextureMode::Direct16Bit:
 | |
|       case GPUTextureMode::Reserved_Direct16Bit:
 | |
|         DecodeTexture16<GPUTexture::Format::A1BGR5>(page_ptr, width, height, dest, dest_stride);
 | |
|         break;
 | |
| 
 | |
|         DefaultCaseIsUnreachable()
 | |
|     }
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     Panic("Unsupported texture format.");
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::DecodeTexture(u8 page, GPUTexturePaletteReg palette, GPUTextureMode mode, GPUTexture* texture)
 | |
| {
 | |
|   alignas(16) static u8 s_temp_buffer[TEXTURE_PAGE_WIDTH * TEXTURE_PAGE_HEIGHT * sizeof(u32)];
 | |
| 
 | |
|   const u32 ps = texture->GetPixelSize();
 | |
|   u8* tex_map;
 | |
|   u32 tex_stride;
 | |
|   const bool mapped =
 | |
|     texture->Map(reinterpret_cast<void**>(&tex_map), &tex_stride, 0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT);
 | |
|   if (!mapped)
 | |
|   {
 | |
|     tex_map = s_temp_buffer;
 | |
|     tex_stride = Common::AlignUpPow2(ps * TEXTURE_PAGE_WIDTH, 4);
 | |
|   }
 | |
| 
 | |
|   const u16* page_ptr = VRAMPagePointer(page);
 | |
|   const u16* palette_ptr = TextureModeHasPalette(mode) ? VRAMPalettePointer(palette) : nullptr;
 | |
|   DecodeTexture(mode, page_ptr, palette_ptr, tex_map, tex_stride, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT,
 | |
|                 texture->GetFormat());
 | |
| 
 | |
|   if (mapped)
 | |
|     texture->Unmap();
 | |
|   else
 | |
|     texture->Update(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, tex_map, tex_stride);
 | |
| }
 | |
| 
 | |
| const GPUTextureCache::Source* GPUTextureCache::LookupSource(SourceKey key, const GSVector4i rect,
 | |
|                                                              PaletteRecordFlags flags)
 | |
| {
 | |
|   GL_SCOPE_FMT("TC: Lookup source {}", SourceKeyToString(key));
 | |
| 
 | |
|   TList<Source>& list = s_state.pages[key.page].sources;
 | |
|   for (TListNode<Source>* n = list.head; n; n = n->next)
 | |
|   {
 | |
|     if (n->ref->key == key)
 | |
|     {
 | |
|       GL_INS("TC: Source hit");
 | |
|       ListMoveToFront(&list, n);
 | |
|       return ReturnSource(n->ref, rect, flags);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return ReturnSource(CreateSource(key), rect, flags);
 | |
| }
 | |
| 
 | |
| const GPUTextureCache::Source* GPUTextureCache::ReturnSource(Source* source, const GSVector4i uv_rect,
 | |
|                                                              PaletteRecordFlags flags)
 | |
| {
 | |
| #if defined(_DEBUG) || defined(_DEVEL)
 | |
|   // GL_INS_FMT("Tex hash: {:016X}", source->texture_hash);
 | |
|   // GL_INS_FMT("Palette hash: {:016X}", source->palette_hash);
 | |
|   if (!uv_rect.eq(INVALID_RECT))
 | |
|   {
 | |
|     LoopXWrappedPages(source->key.page, TexturePageCountForMode(source->key.mode), [&uv_rect](u32 pn) {
 | |
|       const PageEntry& pe = s_state.pages[pn];
 | |
|       ListIterate(pe.writes, [&uv_rect](const VRAMWrite* vrw) {
 | |
|         if (const GSVector4i intersection = uv_rect.rintersect(vrw->write_rect); !intersection.rempty())
 | |
|           GL_INS_FMT("TC: VRAM write was {:016X} ({})", vrw->hash, intersection);
 | |
|       });
 | |
|     });
 | |
|     if (TextureModeHasPalette(source->key.mode))
 | |
|       GL_INS_FMT("TC: Palette was {:016X}", source->palette_hash);
 | |
|   }
 | |
| #endif
 | |
| 
 | |
|   DebugAssert(source->from_hash_cache);
 | |
|   source->from_hash_cache->last_used_frame = System::GetFrameNumber();
 | |
| 
 | |
|   // TODO: Cache var.
 | |
|   if (g_gpu_settings.texture_replacements.dump_textures)
 | |
|   {
 | |
|     source->active_uv_rect = source->active_uv_rect.runion(uv_rect);
 | |
|     source->palette_record_flags |= flags;
 | |
|   }
 | |
| 
 | |
|   return source;
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::IsPageDrawn(u32 page_index, const GSVector4i rect)
 | |
| {
 | |
|   const PageEntry& page = s_state.pages[page_index];
 | |
|   if (page.num_draw_rects == 0 || !page.total_draw_rect.rintersects(rect))
 | |
|     return false;
 | |
| 
 | |
|   // if there's only a single draw rect, it'll match the total
 | |
|   if (page.num_draw_rects == 1)
 | |
|     return true;
 | |
| 
 | |
|   for (u32 i = 0; i < page.num_draw_rects; i++)
 | |
|   {
 | |
|     if (page.draw_rects[i].rintersects(rect))
 | |
|       return true;
 | |
|   }
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::IsRectDrawn(const GSVector4i rect)
 | |
| {
 | |
|   // TODO: This is potentially hot, so replace it with an explicit loop over the pages instead.
 | |
|   return !LoopRectPagesWithEarlyExit(rect, [&rect](u32 pn) { return !IsPageDrawn(pn, rect); });
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::AreSourcePagesDrawn(SourceKey key, const GSVector4i rect)
 | |
| {
 | |
|   // NOTE: This doesn't handle VRAM wrapping. But neither does the caller. YOLO?
 | |
| #if defined(_DEBUG) || defined(_DEVEL)
 | |
|   {
 | |
|     for (u32 offset = 0; offset < TexturePageCountForMode(key.mode); offset++)
 | |
|     {
 | |
|       const u32 wrapped_page = ((key.page + offset) & VRAM_PAGE_X_MASK) + (key.page & VRAM_PAGE_Y_MASK);
 | |
|       if (IsPageDrawn(wrapped_page, rect))
 | |
|       {
 | |
|         GL_INS_FMT("UV rect {} intersects page [{}] dirty rect {}, disabling TC", rect, wrapped_page,
 | |
|                    s_state.pages[wrapped_page].total_draw_rect);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| #endif
 | |
| 
 | |
|   switch (key.mode)
 | |
|   {
 | |
|     case GPUTextureMode::Palette4Bit:
 | |
|     {
 | |
|       return IsPageDrawn(key.page, rect);
 | |
|     }
 | |
| 
 | |
|     case GPUTextureMode::Palette8Bit:
 | |
|     {
 | |
|       // 2 P4 pages per P8 page.
 | |
|       const u32 yoffs = (key.page & VRAM_PAGE_Y_MASK);
 | |
|       return (IsPageDrawn(key.page, rect) || IsPageDrawn(((key.page + 1) & VRAM_PAGE_X_MASK) + yoffs, rect));
 | |
|     }
 | |
| 
 | |
|     case GPUTextureMode::Direct16Bit:
 | |
|     case GPUTextureMode::Reserved_Direct16Bit:
 | |
|     {
 | |
|       // 4 P4 pages per C16 page.
 | |
|       const u32 yoffs = (key.page & VRAM_PAGE_Y_MASK);
 | |
|       return (IsPageDrawn(key.page, rect) || IsPageDrawn(((key.page + 1) & VRAM_PAGE_X_MASK) + yoffs, rect) ||
 | |
|               IsPageDrawn(((key.page + 2) & VRAM_PAGE_X_MASK) + yoffs, rect) ||
 | |
|               IsPageDrawn(((key.page + 3) & VRAM_PAGE_X_MASK) + yoffs, rect));
 | |
|     }
 | |
| 
 | |
|       DefaultCaseIsUnreachable()
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::Invalidate()
 | |
| {
 | |
|   for (u32 i = 0; i < NUM_VRAM_PAGES; i++)
 | |
|   {
 | |
|     InvalidatePageSources(i);
 | |
| 
 | |
|     PageEntry& page = s_state.pages[i];
 | |
|     page.num_draw_rects = 0;
 | |
|     page.total_draw_rect = GSVector4i::zero();
 | |
|     std::memset(page.draw_rects.data(), 0, sizeof(page.draw_rects));
 | |
| 
 | |
|     while (page.writes.tail)
 | |
|       RemoveVRAMWrite(page.writes.tail->ref);
 | |
|   }
 | |
| 
 | |
|   // should all be null
 | |
| #if defined(_DEBUG) || defined(_DEVEL)
 | |
|   for (u32 i = 0; i < NUM_VRAM_PAGES; i++)
 | |
|     DebugAssert(!s_state.pages[i].sources.head && !s_state.pages[i].sources.tail);
 | |
|   DebugAssert(!s_state.last_vram_write);
 | |
| #endif
 | |
| 
 | |
|   ClearHashCache();
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::InvalidateSources()
 | |
| {
 | |
|   // keep draw rects and vram writes
 | |
|   for (u32 i = 0; i < NUM_VRAM_PAGES; i++)
 | |
|     InvalidatePageSources(i);
 | |
| 
 | |
|   ClearHashCache();
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::InvalidatePageSources(u32 pn)
 | |
| {
 | |
|   DebugAssert(pn < NUM_VRAM_PAGES);
 | |
| 
 | |
|   TList<Source>& ps = s_state.pages[pn].sources;
 | |
|   if (ps.head)
 | |
|     GL_INS_FMT("Invalidate page {} sources", pn);
 | |
| 
 | |
|   for (TListNode<Source>* n = ps.head; n;)
 | |
|   {
 | |
|     Source* src = n->ref;
 | |
|     n = n->next;
 | |
| 
 | |
|     DestroySource(src);
 | |
|   }
 | |
| 
 | |
|   DebugAssert(!ps.head && !ps.tail);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::InvalidatePageSources(u32 pn, const GSVector4i rc, bool remove_from_hash_cache)
 | |
| {
 | |
|   DebugAssert(pn < NUM_VRAM_PAGES);
 | |
| 
 | |
|   TList<Source>& ps = s_state.pages[pn].sources;
 | |
|   for (TListNode<Source>* n = ps.head; n;)
 | |
|   {
 | |
|     Source* src = n->ref;
 | |
|     n = n->next;
 | |
| 
 | |
|     // TODO: Make faster?
 | |
|     if (!src->texture_rect.rintersects(rc) &&
 | |
|         (src->key.mode == GPUTextureMode::Direct16Bit || !src->palette_rect.rintersects(rc)))
 | |
|     {
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     GL_INS_FMT("Invalidate source {} in page {} due to overlapping with {}", SourceToString(src), pn, rc);
 | |
|     DestroySource(src, remove_from_hash_cache);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::DestroySource(Source* src, bool remove_from_hash_cache)
 | |
| {
 | |
|   GL_INS_FMT("Invalidate source {}", SourceToString(src));
 | |
| 
 | |
|   if (g_gpu_settings.texture_replacements.dump_textures && !src->active_uv_rect.eq(INVALID_RECT))
 | |
|   {
 | |
|     if (!s_state.config.dump_texture_pages)
 | |
|     {
 | |
|       // Find VRAM writes that overlap with this source
 | |
|       LoopRectPages(src->active_uv_rect, [src](const u32 pn) {
 | |
|         PageEntry& pg = s_state.pages[pn];
 | |
|         ListIterate(pg.writes, [src](VRAMWrite* vw) {
 | |
|           UpdateVRAMWriteSources(vw, src->key, src->active_uv_rect, src->palette_record_flags);
 | |
|         });
 | |
|         return true;
 | |
|       });
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       DumpTextureFromPage(src);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   for (u32 i = 0; i < src->num_page_refs; i++)
 | |
|     ListUnlink(src->page_refs[i]);
 | |
| 
 | |
|   HashCacheEntry* hcentry = src->from_hash_cache;
 | |
|   DebugAssert(hcentry && hcentry->ref_count > 0);
 | |
|   ListUnlink(src->hash_cache_ref);
 | |
|   hcentry->ref_count--;
 | |
|   if (hcentry->ref_count == 0 && remove_from_hash_cache)
 | |
|     RemoveFromHashCache(hcentry, src->key, src->texture_hash, src->palette_hash);
 | |
| 
 | |
|   delete src;
 | |
| }
 | |
| 
 | |
| GPUTextureCache::Source* GPUTextureCache::CreateSource(SourceKey key)
 | |
| {
 | |
|   GL_INS_FMT("TC: Create source {}", SourceKeyToString(key));
 | |
| 
 | |
|   const HashType tex_hash = HashPage(key.page, key.mode);
 | |
|   const HashType pal_hash = (key.mode < GPUTextureMode::Direct16Bit) ? HashPalette(key.palette, key.mode) : 0;
 | |
|   HashCacheEntry* hcentry = LookupHashCache(key, tex_hash, pal_hash);
 | |
|   if (!hcentry)
 | |
|   {
 | |
|     GL_INS("TC: Hash cache lookup fail?!");
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   hcentry->ref_count++;
 | |
| 
 | |
|   Source* src = new Source();
 | |
|   src->key = key;
 | |
|   src->num_page_refs = 0;
 | |
|   src->texture = hcentry->texture.get();
 | |
|   src->from_hash_cache = hcentry;
 | |
|   ListAppend(&hcentry->sources, src, &src->hash_cache_ref);
 | |
|   src->texture_hash = tex_hash;
 | |
|   src->palette_hash = pal_hash;
 | |
| 
 | |
|   // Textures at front, CLUTs at back.
 | |
|   std::array<u32, MAX_PAGE_REFS_PER_SOURCE> page_refns;
 | |
|   const auto add_page_ref = [src, &page_refns](u32 pn) {
 | |
|     // Don't double up references
 | |
|     for (u32 i = 0; i < src->num_page_refs; i++)
 | |
|     {
 | |
|       if (page_refns[i] == pn)
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     const u32 ri = src->num_page_refs++;
 | |
|     page_refns[ri] = pn;
 | |
| 
 | |
|     ListPrepend(&s_state.pages[pn].sources, src, &src->page_refs[ri]);
 | |
|   };
 | |
|   const auto add_page_ref_back = [src, &page_refns](u32 pn) {
 | |
|     // Don't double up references
 | |
|     for (u32 i = 0; i < src->num_page_refs; i++)
 | |
|     {
 | |
|       if (page_refns[i] == pn)
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     const u32 ri = src->num_page_refs++;
 | |
|     page_refns[ri] = pn;
 | |
| 
 | |
|     ListAppend(&s_state.pages[pn].sources, src, &src->page_refs[ri]);
 | |
|   };
 | |
| 
 | |
|   src->texture_rect = GetTextureRect(key.page, key.mode);
 | |
|   src->active_uv_rect = INVALID_RECT;
 | |
|   LoopXWrappedPages(key.page, TexturePageCountForMode(key.mode), add_page_ref);
 | |
| 
 | |
|   if (key.mode < GPUTextureMode::Direct16Bit)
 | |
|   {
 | |
|     src->palette_rect = GetPaletteRect(key.palette, key.mode, true);
 | |
|     LoopXWrappedPages(PalettePageNumber(key.palette), PalettePageCountForMode(key.mode), add_page_ref_back);
 | |
|   }
 | |
| 
 | |
|   GL_INS_FMT("Appended new source {} to {} pages", SourceToString(src), src->num_page_refs);
 | |
|   return src;
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::UpdateVRAMTrackingState()
 | |
| {
 | |
|   s_state.track_vram_writes = ShouldTrackVRAMWrites();
 | |
| }
 | |
| 
 | |
| std::pair<u32, u32> GPUTextureCache::ReducePaletteBounds(const GSVector4i rect, GPUTextureMode mode,
 | |
|                                                          GPUTexturePaletteReg palette)
 | |
| {
 | |
|   DebugAssert(TextureModeHasPalette(mode));
 | |
|   u32 pal_min = GetPaletteWidth(mode) - 1;
 | |
|   u32 pal_max = 0;
 | |
| 
 | |
|   const u32 rect_width = rect.width();
 | |
|   const u32 rect_height = rect.height();
 | |
| 
 | |
|   if (mode == GPUTextureMode::Palette4Bit)
 | |
|   {
 | |
|     const u16* row_ptr = &g_vram[rect.y * VRAM_WIDTH + rect.x];
 | |
|     for (u32 y = 0; y < rect_height; y++)
 | |
|     {
 | |
|       const u16* ptr = row_ptr;
 | |
|       row_ptr += VRAM_WIDTH;
 | |
| 
 | |
|       for (u32 x = 0; x < rect_width; x++)
 | |
|       {
 | |
|         const u16 val = *(ptr++);
 | |
|         const u32 p0 = val & 0xf;
 | |
|         const u32 p1 = (val >> 4) & 0xf;
 | |
|         const u32 p2 = (val >> 8) & 0xf;
 | |
|         const u32 p3 = (val >> 12) & 0xf;
 | |
|         pal_min = std::min(pal_min, std::min(p0, std::min(p1, std::min(p2, p3))));
 | |
|         pal_max = std::max(pal_max, std::max(p0, std::max(p1, std::max(p2, p3))));
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   else // if (mode == GPUTextureMode::Palette8Bit)
 | |
|   {
 | |
|     const u32 aligned_width = Common::AlignDownPow2(rect_width, 8);
 | |
|     const u16* row_ptr = &g_vram[rect.y * VRAM_WIDTH + rect.x];
 | |
|     for (u32 y = 0; y < rect_height; y++)
 | |
|     {
 | |
|       const u16* ptr = row_ptr;
 | |
|       row_ptr += VRAM_WIDTH;
 | |
| 
 | |
|       if (aligned_width > 0) [[likely]]
 | |
|       {
 | |
|         GSVector4i min = GSVector4i::load<false>(ptr);
 | |
|         GSVector4i max = min;
 | |
|         ptr += 8;
 | |
| 
 | |
|         for (u32 x = 8; x < aligned_width; x += 8)
 | |
|         {
 | |
|           const GSVector4i v = GSVector4i::load<false>(ptr);
 | |
|           ptr += 8;
 | |
| 
 | |
|           min = min.min_u8(v);
 | |
|           max = max.max_u8(v);
 | |
|         }
 | |
| 
 | |
|         pal_min = std::min<u32>(pal_min, min.minv_u8());
 | |
|         pal_max = std::max<u32>(pal_max, max.maxv_u8());
 | |
|       }
 | |
| 
 | |
|       for (u32 x = aligned_width; x < rect_width; x++)
 | |
|       {
 | |
|         const u16 val = *(ptr++);
 | |
|         const u32 p0 = (val & 0xFF);
 | |
|         const u32 p1 = (val >> 8);
 | |
|         pal_min = std::min<u32>(pal_min, std::min(p0, p1));
 | |
|         pal_max = std::max<u32>(pal_max, std::max(p0, p1));
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Clamp to VRAM bounds.
 | |
|   const u32 x_base = palette.GetXBase();
 | |
|   if ((x_base + pal_max) >= VRAM_WIDTH) [[unlikely]]
 | |
|   {
 | |
|     WARNING_LOG("Texture with CLUT at {},{} is outside of VRAM bounds, clamping.", x_base, palette.GetYBase());
 | |
|     pal_min = std::min(pal_min, VRAM_WIDTH - x_base - 1);
 | |
|     pal_max = std::min(pal_max, VRAM_WIDTH - x_base - 1);
 | |
|   }
 | |
| 
 | |
|   return std::make_pair(pal_min, pal_max);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::SyncVRAMWritePaletteRecords(VRAMWrite* entry)
 | |
| {
 | |
|   // Have to go through any sources that intersect this write, because they may not have been invalidated yet, in which
 | |
|   // case the active rect also will not have been updated.
 | |
|   if (IsDumpingVRAMWriteTextures())
 | |
|   {
 | |
|     LoopRectPages(entry->active_rect, [entry](const u32 pn) {
 | |
|       const PageEntry& page = s_state.pages[pn];
 | |
|       ListIterate(page.sources, [entry](const Source* src) {
 | |
|         if (!src->active_uv_rect.eq(INVALID_RECT))
 | |
|           UpdateVRAMWriteSources(entry, src->key, src->active_uv_rect, src->palette_record_flags);
 | |
|       });
 | |
| 
 | |
|       return true;
 | |
|     });
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::UpdateVRAMWriteSources(VRAMWrite* entry, SourceKey source_key, const GSVector4i global_uv_rect,
 | |
|                                              PaletteRecordFlags flags)
 | |
| {
 | |
|   // convert to VRAM write space
 | |
|   const GSVector4i write_intersection = entry->active_rect.rintersect(global_uv_rect);
 | |
|   if (write_intersection.rempty())
 | |
|     return;
 | |
| 
 | |
|   // Add to the palette tracking list
 | |
|   std::vector<VRAMWrite::PaletteRecord>::iterator iter;
 | |
|   if (source_key.HasPalette())
 | |
|   {
 | |
|     // Palette requires exact match.
 | |
|     iter = std::find_if(entry->palette_records.begin(), entry->palette_records.end(),
 | |
|                         [&source_key](const auto& it) { return (it.key == source_key); });
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     // C16 only needs to match on the mode, palette is not used, and page doesn't matter.
 | |
|     // In theory we could extend the page skipping to palette textures too, but is it needed?
 | |
|     iter = std::find_if(entry->palette_records.begin(), entry->palette_records.end(),
 | |
|                         [&source_key](const auto& it) { return (it.key.mode == source_key.mode); });
 | |
|   }
 | |
| 
 | |
|   if (iter != entry->palette_records.end())
 | |
|   {
 | |
|     iter->rect = iter->rect.runion(write_intersection);
 | |
|     iter->flags |= flags;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     InitializeVRAMWritePaletteRecord(&entry->palette_records.emplace_back(), source_key, write_intersection, flags);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::SplitVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect)
 | |
| {
 | |
|   SyncVRAMWritePaletteRecords(entry);
 | |
| 
 | |
|   const s32 to_left = (written_rect.left - entry->active_rect.left);
 | |
|   const s32 to_right = (entry->active_rect.right - written_rect.right);
 | |
|   const s32 to_top = (written_rect.top - entry->active_rect.top);
 | |
|   const s32 to_bottom = (entry->active_rect.bottom - written_rect.bottom);
 | |
|   DebugAssert(to_left > 0 || to_right > 0 || to_top > 0 || to_bottom > 0);
 | |
| 
 | |
|   entry->num_splits++;
 | |
| 
 | |
|   GSVector4i rects[4];
 | |
| 
 | |
|   // TODO: more efficient vector swizzle
 | |
|   if (std::max(to_top, to_bottom) > std::max(to_left, to_right))
 | |
|   {
 | |
|     // split top/bottom, then left/right
 | |
|     rects[0] = GSVector4i(entry->active_rect.left, entry->active_rect.top, entry->active_rect.right, written_rect.top);
 | |
|     rects[1] =
 | |
|       GSVector4i(entry->active_rect.left, written_rect.bottom, entry->active_rect.right, entry->active_rect.bottom);
 | |
|     rects[2] = GSVector4i(entry->active_rect.left, entry->active_rect.top + to_top, entry->active_rect.left + to_left,
 | |
|                           entry->active_rect.bottom - to_bottom);
 | |
|     rects[3] = GSVector4i(entry->active_rect.right - to_right, entry->active_rect.top + to_top,
 | |
|                           entry->active_rect.right, entry->active_rect.bottom - to_bottom);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     // split left/right, then top/bottom
 | |
|     rects[0] =
 | |
|       GSVector4i(entry->active_rect.left, entry->active_rect.top, written_rect.left, entry->active_rect.bottom);
 | |
|     rects[1] =
 | |
|       GSVector4i(written_rect.right, entry->active_rect.top, entry->active_rect.right, entry->active_rect.bottom);
 | |
|     rects[2] = GSVector4i(entry->active_rect.left + to_left, entry->active_rect.top + to_top,
 | |
|                           written_rect.right - to_right, entry->active_rect.top - to_top);
 | |
|     rects[3] = GSVector4i(entry->active_rect.left + to_left, entry->active_rect.bottom - to_bottom,
 | |
|                           written_rect.right - to_right, entry->active_rect.bottom);
 | |
|   }
 | |
| 
 | |
|   for (size_t i = 0; i < std::size(rects); i++)
 | |
|   {
 | |
|     const GSVector4i splitr = rects[i];
 | |
|     if (splitr.rempty())
 | |
|       continue;
 | |
| 
 | |
|     VRAMWrite* it = new VRAMWrite();
 | |
|     it->write_rect = entry->write_rect;
 | |
|     it->active_rect = splitr;
 | |
|     it->hash = entry->hash;
 | |
|     it->num_splits = entry->num_splits;
 | |
|     it->num_page_refs = 0;
 | |
| 
 | |
|     // TODO: We probably want to share this...
 | |
|     it->palette_records.reserve(entry->palette_records.size());
 | |
|     for (const VRAMWrite::PaletteRecord& prec : it->palette_records)
 | |
|     {
 | |
|       if (prec.rect.rintersects(splitr))
 | |
|         it->palette_records.push_back(prec);
 | |
|     }
 | |
| 
 | |
|     LoopRectPages(splitr, [it](u32 pn) {
 | |
|       DebugAssert(it->num_page_refs < MAX_PAGE_REFS_PER_WRITE);
 | |
|       ListAppend(&s_state.pages[pn].writes, it, &it->page_refs[it->num_page_refs++]);
 | |
|       return true;
 | |
|     });
 | |
| 
 | |
|     DEV_LOG("Split VRAM write {:016X} at {} in direction {} => {}", it->hash, entry->active_rect, i, splitr);
 | |
|   }
 | |
| 
 | |
|   for (u32 i = 0; i < entry->num_page_refs; i++)
 | |
|     ListUnlink(entry->page_refs[i]);
 | |
| 
 | |
|   delete entry;
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::TryMergeVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect)
 | |
| {
 | |
|   // It shouldn't have been split. Don't want to update after it has been.
 | |
|   if (s_state.last_vram_write->num_splits != 0)
 | |
|     return false;
 | |
| 
 | |
|   // Check coalesce bounds/config.
 | |
|   const u32 coalesce_width = s_state.config.max_vram_write_coalesce_width;
 | |
|   const u32 coalesce_height = s_state.config.max_vram_write_coalesce_height;
 | |
|   const bool merge_vertical = (static_cast<u32>(written_rect.height()) <= coalesce_height &&
 | |
|                                s_state.last_vram_write->write_rect.left == written_rect.left &&
 | |
|                                s_state.last_vram_write->write_rect.right == written_rect.right &&
 | |
|                                s_state.last_vram_write->write_rect.bottom == written_rect.top);
 | |
|   const bool merge_horizontal = (static_cast<u32>(written_rect.width()) <= coalesce_width &&
 | |
|                                  s_state.last_vram_write->write_rect.top == written_rect.top &&
 | |
|                                  s_state.last_vram_write->write_rect.bottom == written_rect.bottom &&
 | |
|                                  s_state.last_vram_write->write_rect.right == written_rect.left);
 | |
|   if (!merge_vertical && !merge_horizontal)
 | |
|     return false;
 | |
| 
 | |
|   // Double-check that nothing has used this write as a source yet (i.e. drawn).
 | |
|   // Don't want to merge textures that are already completely uploaded...
 | |
|   if (!LoopRectPagesWithEarlyExit(entry->active_rect, [entry](const u32 pn) {
 | |
|         return ListIterateWithEarlyExit(s_state.pages[pn].sources, [entry](const Source* src) {
 | |
|           return (!src->active_uv_rect.eq(INVALID_RECT) || !src->active_uv_rect.rintersects(entry->active_rect));
 | |
|         });
 | |
|       }))
 | |
|   {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // Remove from old pages, we'll re-add it.
 | |
|   for (u32 i = 0; i < entry->num_page_refs; i++)
 | |
|     ListUnlink(entry->page_refs[i]);
 | |
|   entry->num_page_refs = 0;
 | |
| 
 | |
|   // Expand the write.
 | |
|   const GSVector4i new_rect = entry->write_rect.runion(written_rect);
 | |
|   DEV_LOG("Expanding VRAM write {:016X} from {} to {}", entry->hash, entry->write_rect, new_rect);
 | |
|   entry->active_rect = new_rect;
 | |
|   entry->write_rect = new_rect;
 | |
|   entry->hash = HashRect(new_rect);
 | |
| 
 | |
|   // Re-add to pages.
 | |
|   LoopRectPages(new_rect, [entry](u32 pn) {
 | |
|     DebugAssert(entry->num_page_refs < MAX_PAGE_REFS_PER_WRITE);
 | |
|     ListAppend(&s_state.pages[pn].writes, entry, &entry->page_refs[entry->num_page_refs++]);
 | |
|     return true;
 | |
|   });
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::RemoveVRAMWrite(VRAMWrite* entry)
 | |
| {
 | |
|   DEV_LOG("Remove VRAM write {:016X} at {}", entry->hash, entry->write_rect);
 | |
| 
 | |
|   SyncVRAMWritePaletteRecords(entry);
 | |
| 
 | |
|   if (entry->num_splits > 0 && !entry->palette_records.empty())
 | |
|   {
 | |
|     // Combine palette records with another write.
 | |
|     VRAMWrite* other_write = nullptr;
 | |
|     LoopRectPagesWithEarlyExit(entry->write_rect, [&entry, &other_write](u32 pn) {
 | |
|       PageEntry& pg = s_state.pages[pn];
 | |
|       ListIterateWithEarlyExit(pg.writes, [&entry, &other_write](VRAMWrite* cur) {
 | |
|         if (cur == entry || cur->hash != entry->hash)
 | |
|           return true;
 | |
| 
 | |
|         other_write = cur;
 | |
|         return false;
 | |
|       });
 | |
|       return (other_write == nullptr);
 | |
|     });
 | |
|     if (other_write)
 | |
|     {
 | |
|       for (const VRAMWrite::PaletteRecord& prec : entry->palette_records)
 | |
|       {
 | |
|         const auto iter = std::find_if(other_write->palette_records.begin(), other_write->palette_records.end(),
 | |
|                                        [&prec](const VRAMWrite::PaletteRecord& it) { return it.key == prec.key; });
 | |
|         if (iter != other_write->palette_records.end())
 | |
|           iter->rect = iter->rect.runion(prec.rect);
 | |
|         else
 | |
|           other_write->palette_records.push_back(prec);
 | |
|       }
 | |
| 
 | |
|       // No dumping from here!
 | |
|       entry->palette_records.clear();
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   for (u32 i = 0; i < entry->num_page_refs; i++)
 | |
|     ListUnlink(entry->page_refs[i]);
 | |
| 
 | |
|   DumpTexturesFromVRAMWrite(entry);
 | |
| 
 | |
|   s_state.last_vram_write = (s_state.last_vram_write == entry) ? nullptr : s_state.last_vram_write;
 | |
|   delete entry;
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::DumpTexturesFromVRAMWrite(VRAMWrite* entry)
 | |
| {
 | |
|   if (g_gpu_settings.texture_replacements.dump_textures && !s_state.config.dump_texture_pages)
 | |
|   {
 | |
|     for (const VRAMWrite::PaletteRecord& prec : entry->palette_records)
 | |
|     {
 | |
|       if (prec.key.mode == GPUTextureMode::Direct16Bit && !s_state.config.dump_c16_textures)
 | |
|         continue;
 | |
| 
 | |
|       HashType pal_hash =
 | |
|         (prec.key.mode < GPUTextureMode::Direct16Bit) ? HashPalette(prec.key.palette, prec.key.mode) : 0;
 | |
| 
 | |
|       // If it's 8-bit, try reducing the range of the palette.
 | |
|       u32 pal_min = 0, pal_max = prec.key.HasPalette() ? (GetPaletteWidth(prec.key.mode) - 1) : 0;
 | |
|       if (prec.key.HasPalette() && s_state.config.reduce_palette_range)
 | |
|       {
 | |
|         std::tie(pal_min, pal_max) = ReducePaletteBounds(prec.rect, prec.key.mode, prec.key.palette);
 | |
|         pal_hash = HashPartialPalette(prec.palette, pal_min, pal_max);
 | |
|       }
 | |
| 
 | |
|       const u32 offset_x = ApplyTextureModeShift(prec.key.mode, prec.rect.left - entry->write_rect.left);
 | |
|       const u32 offset_y = prec.rect.top - entry->write_rect.top;
 | |
| 
 | |
|       DumpTexture(TextureReplacementType::TextureFromVRAMWrite, offset_x, offset_y, entry->write_rect.width(),
 | |
|                   entry->write_rect.height(), prec.key.mode, entry->hash, pal_hash, pal_min, pal_max, prec.palette,
 | |
|                   prec.rect, prec.flags);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::DumpTextureFromPage(const Source* src)
 | |
| {
 | |
|   // C16 filter
 | |
|   if (!s_state.config.dump_c16_textures && src->key.mode >= GPUTextureMode::Direct16Bit)
 | |
|     return;
 | |
| 
 | |
|   const bool dump_full_page = s_state.config.dump_full_texture_pages;
 | |
| 
 | |
|   // Dump active area from page
 | |
|   HashType pal_hash = src->palette_hash;
 | |
|   const u16* pal_ptr = src->key.HasPalette() ? VRAMPalettePointer(src->key.palette) : nullptr;
 | |
| 
 | |
|   // We don't want to dump the wraparound
 | |
|   const GSVector4i unwrapped_texture_rect =
 | |
|     (TexturePageIsWrapping(src->key.mode, src->key.page) ?
 | |
|        GSVector4i(VRAMPageStartX(src->key.page), src->texture_rect.y, VRAM_WIDTH, src->texture_rect.w) :
 | |
|        src->texture_rect);
 | |
|   const GSVector4i dump_rect =
 | |
|     dump_full_page ? unwrapped_texture_rect : src->active_uv_rect.rintersect(unwrapped_texture_rect);
 | |
|   if (dump_rect.rempty())
 | |
|     return;
 | |
| 
 | |
|   // Need to hash only the active area.
 | |
|   const HashType tex_hash = HashRect(dump_rect);
 | |
| 
 | |
|   // Source rect needs the offset, but we still only want to hash the active area when replacing
 | |
|   const GSVector4i dump_offset_in_page = dump_rect.sub32(unwrapped_texture_rect);
 | |
| 
 | |
|   // If it's 8-bit, try reducing the range of the palette.
 | |
|   u32 pal_min = 0, pal_max = src->key.HasPalette() ? (GetPaletteWidth(src->key.mode) - 1) : 0;
 | |
|   if (src->key.HasPalette() && s_state.config.reduce_palette_range)
 | |
|   {
 | |
|     std::tie(pal_min, pal_max) = ReducePaletteBounds(dump_rect, src->key.mode, src->key.palette);
 | |
|     pal_hash = HashPartialPalette(pal_ptr, pal_min, pal_max);
 | |
|   }
 | |
| 
 | |
|   DumpTexture(TextureReplacementType::TextureFromPage, ApplyTextureModeShift(src->key.mode, dump_offset_in_page.x),
 | |
|               dump_offset_in_page.y, unwrapped_texture_rect.width(), unwrapped_texture_rect.height(), src->key.mode,
 | |
|               tex_hash, pal_hash, pal_min, pal_max, pal_ptr, dump_rect, src->palette_record_flags);
 | |
| }
 | |
| 
 | |
| GPUTextureCache::HashType GPUTextureCache::HashPage(u8 page, GPUTextureMode mode)
 | |
| {
 | |
|   XXH3_state_t state;
 | |
|   XXH3_64bits_reset(&state);
 | |
| 
 | |
|   // Pages aren't contiguous in memory :(
 | |
|   const u16* page_ptr = VRAMPagePointer(page);
 | |
| 
 | |
|   switch (mode)
 | |
|   {
 | |
|     case GPUTextureMode::Palette4Bit:
 | |
|     {
 | |
|       for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++)
 | |
|       {
 | |
|         XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * sizeof(u16));
 | |
|         page_ptr += VRAM_WIDTH;
 | |
|       }
 | |
|     }
 | |
|     break;
 | |
| 
 | |
|     case GPUTextureMode::Palette8Bit:
 | |
|     {
 | |
|       for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++)
 | |
|       {
 | |
|         XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * 2 * sizeof(u16));
 | |
|         page_ptr += VRAM_WIDTH;
 | |
|       }
 | |
|     }
 | |
|     break;
 | |
| 
 | |
|     case GPUTextureMode::Direct16Bit:
 | |
|     {
 | |
|       for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++)
 | |
|       {
 | |
|         XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * 4 * sizeof(u16));
 | |
|         page_ptr += VRAM_WIDTH;
 | |
|       }
 | |
|     }
 | |
|     break;
 | |
| 
 | |
|       DefaultCaseIsUnreachable()
 | |
|   }
 | |
| 
 | |
|   return XXH3_64bits_digest(&state);
 | |
| }
 | |
| 
 | |
| GPUTextureCache::HashType GPUTextureCache::HashPalette(GPUTexturePaletteReg palette, GPUTextureMode mode)
 | |
| {
 | |
|   const u32 x_base = palette.GetXBase();
 | |
|   const u16* base = VRAMPalettePointer(palette);
 | |
| 
 | |
|   switch (mode)
 | |
|   {
 | |
|     case GPUTextureMode::Palette4Bit:
 | |
|       return XXH3_64bits(base, sizeof(u16) * 16);
 | |
| 
 | |
|     case GPUTextureMode::Palette8Bit:
 | |
|     {
 | |
|       // If the palette wraps around, chances are we aren't using those indices.
 | |
|       // Games that do this: Metal Gear Solid.
 | |
|       if ((x_base + 256) > VRAM_WIDTH) [[unlikely]]
 | |
|         return XXH3_64bits(base, sizeof(u16) * (VRAM_WIDTH - x_base));
 | |
|       else
 | |
|         return XXH3_64bits(base, sizeof(u16) * 256);
 | |
|     }
 | |
| 
 | |
|       DefaultCaseIsUnreachable()
 | |
|   }
 | |
| }
 | |
| 
 | |
| GPUTextureCache::HashType GPUTextureCache::HashPartialPalette(GPUTexturePaletteReg palette, GPUTextureMode mode,
 | |
|                                                               u32 min, u32 max)
 | |
| {
 | |
|   DebugAssert((palette.GetXBase() + max + 1) <= VRAM_WIDTH);
 | |
|   return HashPartialPalette(VRAMPalettePointer(palette), min, max);
 | |
| }
 | |
| 
 | |
| GPUTextureCache::HashType GPUTextureCache::HashPartialPalette(const u16* palette, u32 min, u32 max)
 | |
| {
 | |
|   const u32 size = max - min + 1;
 | |
|   return XXH3_64bits(palette, sizeof(u16) * size);
 | |
| }
 | |
| 
 | |
| GPUTextureCache::HashType GPUTextureCache::HashRect(const GSVector4i rc)
 | |
| {
 | |
|   XXH3_state_t state;
 | |
|   XXH3_64bits_reset(&state);
 | |
| 
 | |
|   const u32 width = rc.width();
 | |
|   const u32 height = rc.height();
 | |
|   const u16* ptr = &g_vram[rc.top * VRAM_WIDTH + rc.left];
 | |
|   for (u32 y = 0; y < height; y++)
 | |
|   {
 | |
|     XXH3_64bits_update(&state, ptr, width * sizeof(u16));
 | |
|     ptr += VRAM_WIDTH;
 | |
|   }
 | |
| 
 | |
|   return XXH3_64bits_digest(&state);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::InitializeVRAMWritePaletteRecord(VRAMWrite::PaletteRecord* record, SourceKey source_key,
 | |
|                                                        const GSVector4i rect, PaletteRecordFlags flags)
 | |
| {
 | |
|   record->rect = rect;
 | |
|   record->key = source_key;
 | |
|   record->flags = flags;
 | |
| 
 | |
|   switch (source_key.mode)
 | |
|   {
 | |
|     case GPUTextureMode::Palette4Bit:
 | |
|     {
 | |
|       // Always has 16 colours.
 | |
|       std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), 16 * sizeof(u16));
 | |
|       record->palette_hash = XXH3_64bits(record->palette, 16 * sizeof(u16));
 | |
|     }
 | |
|     break;
 | |
| 
 | |
|     case GPUTextureMode::Palette8Bit:
 | |
|     {
 | |
|       // Might have less if we're extending over the edge. Clamp it.
 | |
|       const u32 pal_width = std::min<u32>(256, VRAM_WIDTH - source_key.palette.GetXBase());
 | |
|       if (pal_width != 256)
 | |
|       {
 | |
|         std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), pal_width * sizeof(u16));
 | |
|         std::memset(&record->palette[pal_width], 0, sizeof(record->palette) - (pal_width * sizeof(u16)));
 | |
|         record->palette_hash = XXH3_64bits(record->palette, pal_width * sizeof(u16));
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         // Whole thing, 2ez.
 | |
|         std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), 256 * sizeof(u16));
 | |
|         record->palette_hash = XXH3_64bits(record->palette, 256 * sizeof(u16));
 | |
|       }
 | |
|     }
 | |
|     break;
 | |
| 
 | |
|     case GPUTextureMode::Direct16Bit:
 | |
|     {
 | |
|       // No palette.
 | |
|       std::memset(record->palette, 0, sizeof(record->palette));
 | |
|       record->palette_hash = 0;
 | |
|     }
 | |
|     break;
 | |
| 
 | |
|       DefaultCaseIsUnreachable()
 | |
|   }
 | |
| }
 | |
| 
 | |
| GPUTextureCache::HashCacheKey GPUTextureCache::GetHashCacheKey(SourceKey key, HashType tex_hash, HashType pal_hash)
 | |
| {
 | |
|   return HashCacheKey{tex_hash, pal_hash, static_cast<HashType>(key.mode)};
 | |
| }
 | |
| 
 | |
| GPUTextureCache::HashCacheEntry* GPUTextureCache::LookupHashCache(SourceKey key, HashType tex_hash, HashType pal_hash)
 | |
| {
 | |
|   const HashCacheKey hkey = GetHashCacheKey(key, tex_hash, pal_hash);
 | |
| 
 | |
|   const auto it = s_state.hash_cache.find(hkey);
 | |
|   if (it != s_state.hash_cache.end())
 | |
|   {
 | |
|     GL_INS_FMT("TC: Hash cache hit {:X} {:X}", hkey.texture_hash, hkey.palette_hash);
 | |
|     return &it->second;
 | |
|   }
 | |
| 
 | |
|   GL_INS_FMT("TC: Hash cache miss {:X} {:X}", hkey.texture_hash, hkey.palette_hash);
 | |
| 
 | |
|   HashCacheEntry entry;
 | |
|   entry.ref_count = 0;
 | |
|   entry.last_used_frame = 0;
 | |
|   entry.sources = {};
 | |
|   entry.texture =
 | |
|     g_gpu_device->FetchTexture(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, 1, 1, 1, GPUTexture::Type::Texture,
 | |
|                                s_state.hash_cache_texture_format, GPUTexture::Flags::None);
 | |
|   if (!entry.texture)
 | |
|   {
 | |
|     ERROR_LOG("Failed to create texture.");
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   DecodeTexture(key.page, key.palette, key.mode, entry.texture.get());
 | |
| 
 | |
|   if (g_gpu_settings.texture_replacements.enable_texture_replacements)
 | |
|     ApplyTextureReplacements(key, tex_hash, pal_hash, &entry);
 | |
| 
 | |
|   s_state.hash_cache_memory_usage += entry.texture->GetVRAMUsage();
 | |
| 
 | |
|   return &s_state.hash_cache.emplace(hkey, std::move(entry)).first->second;
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::RemoveFromHashCache(HashCacheEntry* entry, SourceKey key, HashType tex_hash, HashType pal_hash)
 | |
| {
 | |
|   const HashCacheKey hckey = GetHashCacheKey(key, tex_hash, pal_hash);
 | |
|   const auto iter = s_state.hash_cache.find(hckey);
 | |
|   Assert(iter != s_state.hash_cache.end() && &iter->second == entry);
 | |
|   RemoveFromHashCache(iter);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::RemoveFromHashCache(HashCache::iterator it)
 | |
| {
 | |
|   ListIterate(it->second.sources, [](Source* source) { DestroySource(source); });
 | |
| 
 | |
|   const size_t vram_usage = it->second.texture->GetVRAMUsage();
 | |
|   DebugAssert(s_state.hash_cache_memory_usage >= vram_usage);
 | |
|   s_state.hash_cache_memory_usage -= vram_usage;
 | |
| 
 | |
|   g_gpu_device->RecycleTexture(std::move(it->second.texture));
 | |
|   s_state.hash_cache.erase(it);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::ClearHashCache()
 | |
| {
 | |
|   while (!s_state.hash_cache.empty())
 | |
|     RemoveFromHashCache(s_state.hash_cache.begin());
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::Compact()
 | |
| {
 | |
|   // Number of frames before unused hash cache entries are evicted.
 | |
|   static constexpr u32 MAX_HASH_CACHE_AGE = 600;
 | |
| 
 | |
|   // Maximum number of textures which are permitted in the hash cache at the end of the frame.
 | |
|   const u32 max_hash_cache_size = s_state.config.max_hash_cache_entries;
 | |
|   const size_t max_hash_cache_memory = static_cast<size_t>(s_state.config.max_hash_cache_vram_usage_mb) * 1048576;
 | |
| 
 | |
|   bool might_need_cache_purge =
 | |
|     (s_state.hash_cache.size() > max_hash_cache_size || s_state.hash_cache_memory_usage >= max_hash_cache_memory);
 | |
|   if (might_need_cache_purge)
 | |
|     s_state.hash_cache_purge_list.clear();
 | |
| 
 | |
|   const u32 frame_number = System::GetFrameNumber();
 | |
|   const u32 min_frame_number = ((frame_number > MAX_HASH_CACHE_AGE) ? (frame_number - MAX_HASH_CACHE_AGE) : 0);
 | |
| 
 | |
|   for (auto it = s_state.hash_cache.begin(); it != s_state.hash_cache.end();)
 | |
|   {
 | |
|     HashCacheEntry& e = it->second;
 | |
|     if (e.ref_count == 0 && e.last_used_frame < min_frame_number)
 | |
|     {
 | |
|       RemoveFromHashCache(it++);
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     // We might free up enough just with "normal" removals above.
 | |
|     if (might_need_cache_purge)
 | |
|     {
 | |
|       might_need_cache_purge =
 | |
|         (s_state.hash_cache.size() > max_hash_cache_size || s_state.hash_cache_memory_usage >= max_hash_cache_memory);
 | |
|       if (might_need_cache_purge)
 | |
|         s_state.hash_cache_purge_list.emplace_back(it, static_cast<s32>(e.last_used_frame));
 | |
|     }
 | |
| 
 | |
|     ++it;
 | |
|   }
 | |
| 
 | |
|   // Pushing to a list, sorting, and removing ends up faster than re-iterating the map.
 | |
|   if (might_need_cache_purge)
 | |
|   {
 | |
|     DEV_LOG("Force compacting hash cache, count = {}, size = {:.1f} MB", s_state.hash_cache.size(),
 | |
|             static_cast<float>(s_state.hash_cache_memory_usage) / 1048576.0f);
 | |
| 
 | |
|     std::sort(s_state.hash_cache_purge_list.begin(), s_state.hash_cache_purge_list.end(),
 | |
|               [](const auto& lhs, const auto& rhs) { return lhs.second < rhs.second; });
 | |
| 
 | |
|     size_t purge_index = 0;
 | |
|     while (s_state.hash_cache.size() > max_hash_cache_size || s_state.hash_cache_memory_usage >= max_hash_cache_memory)
 | |
|     {
 | |
|       if (purge_index == s_state.hash_cache_purge_list.size())
 | |
|       {
 | |
|         WARNING_LOG("Cannot find hash cache entries to purge, current hash cache size is {} MB in {} textures.",
 | |
|                     static_cast<double>(s_state.hash_cache_memory_usage) / 1048576.0, s_state.hash_cache.size());
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|       RemoveFromHashCache(s_state.hash_cache_purge_list[purge_index++].first);
 | |
|     }
 | |
| 
 | |
|     DEV_LOG("Finished compacting hash cache, count = {}, size = {:.1f} MB", s_state.hash_cache.size(),
 | |
|             static_cast<float>(s_state.hash_cache_memory_usage) / 1048576.0f);
 | |
|   }
 | |
| 
 | |
|   CompactTextureReplacementGPUImages();
 | |
| }
 | |
| 
 | |
| size_t GPUTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& k) const
 | |
| {
 | |
|   std::size_t h = 0;
 | |
|   hash_combine(h, k.texture_hash, k.palette_hash, k.mode);
 | |
|   return h;
 | |
| }
 | |
| 
 | |
| TinyString GPUTextureCache::VRAMReplacementName::ToString() const
 | |
| {
 | |
|   return TinyString::from_format("{:08X}{:08X}", high, low);
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::VRAMReplacementName::Parse(const std::string_view file_title)
 | |
| {
 | |
|   if (file_title.length() != 43)
 | |
|     return false;
 | |
| 
 | |
|   const std::optional<u64> high_value = StringUtil::FromChars<u64>(file_title.substr(11, 16), 16);
 | |
|   const std::optional<u64> low_value = StringUtil::FromChars<u64>(file_title.substr(11 + 16), 16);
 | |
|   if (!high_value.has_value() || !low_value.has_value())
 | |
|     return false;
 | |
| 
 | |
|   low = low_value.value();
 | |
|   high = high_value.value();
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| size_t GPUTextureCache::VRAMReplacementNameHash::operator()(const VRAMReplacementName& name) const
 | |
| {
 | |
|   size_t seed = std::hash<u64>{}(name.low);
 | |
|   hash_combine(seed, name.high);
 | |
|   return seed;
 | |
| }
 | |
| 
 | |
| static constexpr const char* s_texture_replacement_mode_names[] = {"P4",   "P8",   "C16",   "C16",
 | |
|                                                                    "STP4", "STP8", "STC16", "STC16"};
 | |
| 
 | |
| TinyString GPUTextureCache::TextureReplacementName::ToString() const
 | |
| {
 | |
|   const char* type_str = (type == TextureReplacementType::TextureFromVRAMWrite) ? "texupload" : "texpage";
 | |
|   const char* mode_str = s_texture_replacement_mode_names[texture_mode];
 | |
|   if (GetTextureMode() < GPUTextureMode::Direct16Bit)
 | |
|   {
 | |
|     return TinyString::from_format("{}-{}-{:016X}-{:016X}-{}x{}-{}-{}-{}x{}-P{}-{}", type_str, mode_str, src_hash,
 | |
|                                    pal_hash, src_width, src_height, offset_x, offset_y, width, height, pal_min,
 | |
|                                    pal_max);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     return TinyString::from_format("{}-{}-{:016X}-{}x{}-{}-{}-{}x{}", type_str, mode_str, src_hash, src_width,
 | |
|                                    src_height, offset_x, offset_y, width, height);
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::TextureReplacementName::Parse(const std::string_view file_title)
 | |
| {
 | |
|   // TODO: Swap to https://github.com/eliaskosunen/scnlib
 | |
| 
 | |
|   std::string_view::size_type start_pos = 0;
 | |
|   std::string_view::size_type end_pos = file_title.find("-", start_pos);
 | |
|   if (end_pos == std::string_view::npos)
 | |
|     return false;
 | |
| 
 | |
|   // type
 | |
|   std::string_view token = file_title.substr(start_pos, end_pos);
 | |
|   if (token == "texupload")
 | |
|     type = TextureReplacementType::TextureFromVRAMWrite;
 | |
|   else if (token == "texpage")
 | |
|     type = TextureReplacementType::TextureFromPage;
 | |
|   else
 | |
|     return false;
 | |
|   start_pos = end_pos + 1;
 | |
|   end_pos = file_title.find("-", start_pos + 1);
 | |
|   if (end_pos == std::string_view::npos)
 | |
|     return false;
 | |
| 
 | |
|   // mode
 | |
|   token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|   std::optional<u8> mode_opt;
 | |
|   for (size_t i = 0; i < std::size(s_texture_replacement_mode_names); i++)
 | |
|   {
 | |
|     if (token == s_texture_replacement_mode_names[i])
 | |
|     {
 | |
|       mode_opt = static_cast<u8>(i);
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
|   if (!mode_opt.has_value())
 | |
|     return false;
 | |
|   texture_mode = mode_opt.value();
 | |
|   start_pos = end_pos + 1;
 | |
|   end_pos = file_title.find("-", start_pos + 1);
 | |
|   if (end_pos == std::string_view::npos)
 | |
|     return false;
 | |
| 
 | |
|   // src_hash
 | |
|   token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|   std::optional<u64> val64;
 | |
|   if (token.size() != 16 || !(val64 = StringUtil::FromChars<u64>(token, 16)).has_value())
 | |
|     return false;
 | |
|   src_hash = val64.value();
 | |
| 
 | |
|   if (GetTextureMode() < GPUTextureMode::Direct16Bit)
 | |
|   {
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // pal_hash
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (token.size() != 16 || !(val64 = StringUtil::FromChars<u64>(token, 16)).has_value())
 | |
|       return false;
 | |
|     pal_hash = val64.value();
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("x", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // src_width
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     std::optional<u16> val16;
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     src_width = val16.value();
 | |
|     if (src_width == 0)
 | |
|       return false;
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // src_height
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     src_height = val16.value();
 | |
|     if (src_height == 0)
 | |
|       return false;
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // offset_x
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     offset_x = val16.value();
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // offset_y
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     offset_y = val16.value();
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("x", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // width
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     width = val16.value();
 | |
|     if (width == 0)
 | |
|       return false;
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // height
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     height = val16.value();
 | |
|     if (height == 0)
 | |
|       return false;
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos || file_title[start_pos] != 'P')
 | |
|       return false;
 | |
| 
 | |
|     // pal_min
 | |
|     token = file_title.substr(start_pos + 1, end_pos - start_pos - 1);
 | |
|     std::optional<u8> val8;
 | |
|     if (!(val8 = StringUtil::FromChars<u8>(token)).has_value())
 | |
|       return false;
 | |
|     pal_min = val8.value();
 | |
|     start_pos = end_pos + 1;
 | |
| 
 | |
|     // pal_max
 | |
|     token = file_title.substr(start_pos);
 | |
|     if (!(val8 = StringUtil::FromChars<u8>(token)).has_value())
 | |
|       return false;
 | |
|     pal_max = val8.value();
 | |
|     if (pal_min > pal_max)
 | |
|       return false;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("x", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // src_width
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     std::optional<u16> val16;
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     src_width = val16.value();
 | |
|     if (src_width == 0)
 | |
|       return false;
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // src_height
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     src_height = val16.value();
 | |
|     if (src_height == 0)
 | |
|       return false;
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // offset_x
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     offset_x = val16.value();
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("-", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // offset_y
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     offset_y = val16.value();
 | |
|     start_pos = end_pos + 1;
 | |
|     end_pos = file_title.find("x", start_pos + 1);
 | |
|     if (end_pos == std::string_view::npos)
 | |
|       return false;
 | |
| 
 | |
|     // width
 | |
|     token = file_title.substr(start_pos, end_pos - start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     width = val16.value();
 | |
|     if (width == 0)
 | |
|       return false;
 | |
|     start_pos = end_pos + 1;
 | |
| 
 | |
|     // height
 | |
|     token = file_title.substr(start_pos);
 | |
|     if (!(val16 = StringUtil::FromChars<u16>(token)).has_value())
 | |
|       return false;
 | |
|     height = val16.value();
 | |
|     if (height == 0)
 | |
|       return false;
 | |
|   }
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| GPUTextureCache::TextureReplacementIndex GPUTextureCache::TextureReplacementName::GetIndex() const
 | |
| {
 | |
|   return {src_hash, GetTextureMode()};
 | |
| }
 | |
| 
 | |
| GPUTextureMode GPUTextureCache::TextureReplacementName::GetTextureMode() const
 | |
| {
 | |
|   return static_cast<GPUTextureMode>(texture_mode & 3u);
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::TextureReplacementName::IsSemitransparent() const
 | |
| {
 | |
|   return (texture_mode >= 4);
 | |
| }
 | |
| 
 | |
| size_t GPUTextureCache::TextureReplacementIndexHash::operator()(const TextureReplacementIndex& name) const
 | |
| {
 | |
|   // TODO: This sucks ass, do better.
 | |
|   size_t seed = std::hash<u64>{}(name.src_hash);
 | |
|   hash_combine(seed, static_cast<u8>(name.mode));
 | |
|   return seed;
 | |
| }
 | |
| 
 | |
| size_t GPUTextureCache::DumpedTextureKeyHash::operator()(const DumpedTextureKey& k) const
 | |
| {
 | |
|   // TODO: This is slow
 | |
|   std::size_t hash = 0;
 | |
|   hash_combine(hash, k.tex_hash, k.pal_hash, k.width, k.height, k.texture_mode);
 | |
|   return hash;
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::GameSerialChanged()
 | |
| {
 | |
|   ReloadTextureReplacements(false);
 | |
| }
 | |
| 
 | |
| GPUTexture* GPUTextureCache::GetVRAMReplacement(u32 width, u32 height, const void* pixels)
 | |
| {
 | |
|   const VRAMReplacementName hash = GetVRAMWriteHash(width, height, pixels);
 | |
| 
 | |
|   const auto it = s_state.vram_replacements.find(hash);
 | |
|   if (it == s_state.vram_replacements.end())
 | |
|     return nullptr;
 | |
| 
 | |
|   return GetTextureReplacementGPUImage(it->second);
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::ShouldDumpVRAMWrite(u32 width, u32 height)
 | |
| {
 | |
|   return (g_gpu_settings.texture_replacements.dump_vram_writes &&
 | |
|           width >= s_state.config.vram_write_dump_width_threshold &&
 | |
|           height >= s_state.config.vram_write_dump_height_threshold);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::DumpVRAMWrite(u32 width, u32 height, const void* pixels)
 | |
| {
 | |
|   const VRAMReplacementName name = GetVRAMWriteHash(width, height, pixels);
 | |
|   if (s_state.dumped_vram_writes.find(name) != s_state.dumped_vram_writes.end())
 | |
|   {
 | |
|     DEV_COLOR_LOG(Green, "Not dumping {}", name.ToString());
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   s_state.dumped_vram_writes.insert(name);
 | |
| 
 | |
|   const std::string path = GetVRAMWriteDumpPath(name);
 | |
|   if (path.empty() || FileSystem::FileExists(path.c_str()))
 | |
|     return;
 | |
| 
 | |
|   Image image(width, height, ImageFormat::RGBA8);
 | |
| 
 | |
|   const u16* src_pixels = reinterpret_cast<const u16*>(pixels);
 | |
| 
 | |
|   for (u32 y = 0; y < height; y++)
 | |
|   {
 | |
|     u8* row_ptr = image.GetRowPixels(y);
 | |
|     for (u32 x = 0; x < width; x++)
 | |
|     {
 | |
|       const u32 pixel32 = VRAMRGBA5551ToRGBA8888(*(src_pixels++));
 | |
|       std::memcpy(row_ptr, &pixel32, sizeof(pixel32));
 | |
|       row_ptr += sizeof(pixel32);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (s_state.config.dump_vram_write_force_alpha_channel)
 | |
|     image.SetAllPixelsOpaque();
 | |
| 
 | |
|   INFO_LOG("Dumping {}x{} VRAM write to '{}'", width, height, Path::GetFileName(path));
 | |
| 
 | |
|   Error error;
 | |
|   if (!image.SaveToFile(path.c_str(), Image::DEFAULT_SAVE_QUALITY, &error)) [[unlikely]]
 | |
|   {
 | |
|     ERROR_LOG("Failed to dump {}x{} VRAM write to '{}': {}", width, height, Path::GetFileName(path),
 | |
|               error.GetDescription());
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::DumpTexture(TextureReplacementType type, u32 offset_x, u32 offset_y, u32 src_width,
 | |
|                                   u32 src_height, GPUTextureMode mode, HashType src_hash, HashType pal_hash,
 | |
|                                   u32 pal_min, u32 pal_max, const u16* palette_data, const GSVector4i rect,
 | |
|                                   PaletteRecordFlags flags)
 | |
| {
 | |
|   const u32 width = ApplyTextureModeShift(mode, rect.width());
 | |
|   const u32 height = rect.height();
 | |
| 
 | |
|   if (width < s_state.config.texture_dump_width_threshold || height < s_state.config.texture_dump_height_threshold)
 | |
|     return;
 | |
| 
 | |
|   const bool semitransparent = ((flags & PaletteRecordFlags::HasSemiTransparentDraws) != PaletteRecordFlags::None &&
 | |
|                                 !s_state.config.dump_texture_force_alpha_channel);
 | |
|   const u8 dumped_texture_mode = static_cast<u8>(mode) | (semitransparent ? 4 : 0);
 | |
| 
 | |
|   const TextureReplacementName name = {
 | |
|     .src_hash = src_hash,
 | |
|     .pal_hash = pal_hash,
 | |
|     .src_width = Truncate16(src_width),
 | |
|     .src_height = Truncate16(src_height),
 | |
|     .type = type,
 | |
|     .texture_mode = dumped_texture_mode,
 | |
|     .offset_x = Truncate16(offset_x),
 | |
|     .offset_y = Truncate16(offset_y),
 | |
|     .width = Truncate16(width),
 | |
|     .height = Truncate16(height),
 | |
|     .pal_min = Truncate8(pal_min),
 | |
|     .pal_max = Truncate8(pal_max),
 | |
|   };
 | |
| 
 | |
|   const DumpedTextureKey key = DumpedTextureKey::FromName(name);
 | |
|   if (s_state.dumped_textures.find(key) != s_state.dumped_textures.end())
 | |
|   {
 | |
|     DEV_COLOR_LOG(Green, "Not dumping {}", name.ToString());
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   if (!EnsureGameDirectoryExists())
 | |
|     return;
 | |
| 
 | |
|   const std::string dump_directory = GetTextureDumpDirectory();
 | |
|   if (!FileSystem::EnsureDirectoryExists(dump_directory.c_str(), false))
 | |
|     return;
 | |
| 
 | |
|   s_state.dumped_textures.insert(key);
 | |
| 
 | |
|   SmallString filename = name.ToString();
 | |
|   filename.append(".png");
 | |
| 
 | |
|   std::string path = Path::Combine(dump_directory, filename);
 | |
|   if (FileSystem::FileExists(path.c_str()))
 | |
|     return;
 | |
| 
 | |
|   DEV_LOG("Dumping VRAM write {:016X} [{}x{}] at {}", src_hash, width, height, rect);
 | |
| 
 | |
|   Image image(width, height, ImageFormat::RGBA8);
 | |
|   GPUTextureCache::DecodeTexture(mode, &g_vram[rect.top * VRAM_WIDTH + rect.left], palette_data, image.GetPixels(),
 | |
|                                  image.GetPitch(), width, height, GPUTexture::Format::RGBA8);
 | |
| 
 | |
|   System::QueueAsyncTask([path = std::move(path), image = std::move(image), width, height, semitransparent]() mutable {
 | |
|     // TODO: Vectorize this.
 | |
|     u32* image_pixels = reinterpret_cast<u32*>(image.GetPixels());
 | |
|     const u32* image_pixels_end = image_pixels + (width * height);
 | |
|     if (s_state.config.dump_texture_force_alpha_channel)
 | |
|     {
 | |
|       for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++)
 | |
|         *pixel |= 0xFF000000u;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       if (semitransparent)
 | |
|       {
 | |
|         // Alpha channel should be inverted, because 0 means opaque, 1 is semitransparent.
 | |
|         // Pixel value of 0000 is still completely transparent.
 | |
|         for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++)
 | |
|         {
 | |
|           const u32 val = *pixel;
 | |
|           *pixel = (val == 0u) ? 0u : ((val & 0xFFFFFFFu) | ((val & 0x80000000u) ? 0x80000000u : 0xFF000000u));
 | |
|         }
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         // Only cut out 0000 pixels.
 | |
|         for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++)
 | |
|         {
 | |
|           const u32 val = *pixel;
 | |
|           *pixel = (val == 0u) ? 0u : (val | 0xFF000000u);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     if (!image.SaveToFile(path.c_str()))
 | |
|       ERROR_LOG("Failed to write texture dump to {}.", Path::GetFileName(path));
 | |
|   });
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::IsMatchingReplacementPalette(HashType full_palette_hash, GPUTextureMode mode,
 | |
|                                                    GPUTexturePaletteReg palette, const TextureReplacementName& name)
 | |
| {
 | |
|   if (!TextureModeHasPalette(mode))
 | |
|     return true;
 | |
| 
 | |
|   const u32 full_pal_max = GetPaletteWidth(mode) - 1;
 | |
|   if (name.pal_min == 0 && name.pal_max == full_pal_max)
 | |
|     return (name.pal_hash == full_palette_hash);
 | |
| 
 | |
|   // If the range goes off the edge of VRAM, it's not a match.
 | |
|   if ((palette.GetXBase() + name.pal_max) >= VRAM_WIDTH)
 | |
|     return false;
 | |
| 
 | |
|   // This needs to re-hash every lookup, which is a bit of a bummer.
 | |
|   // But at least there's the hash cache, so it shouldn't be too painful...
 | |
|   const HashType partial_hash = GPUTextureCache::HashPartialPalette(palette, mode, name.pal_min, name.pal_max);
 | |
|   return (partial_hash == name.pal_hash);
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::HasVRAMWriteTextureReplacements()
 | |
| {
 | |
|   return !s_state.vram_write_texture_replacements.empty();
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::GetVRAMWriteTextureReplacements(std::vector<TextureReplacementSubImage>& replacements,
 | |
|                                                       HashType vram_write_hash, HashType palette_hash,
 | |
|                                                       GPUTextureMode mode, GPUTexturePaletteReg palette,
 | |
|                                                       const GSVector2i& offset_to_page)
 | |
| {
 | |
|   const TextureReplacementIndex index = {vram_write_hash, mode};
 | |
|   const auto& [begin, end] = s_state.vram_write_texture_replacements.equal_range(index);
 | |
|   if (begin == end)
 | |
|     return;
 | |
| 
 | |
|   const GSVector4i offset_to_page_v = GSVector4i(offset_to_page).xyxy();
 | |
| 
 | |
|   for (auto it = begin; it != end; ++it)
 | |
|   {
 | |
|     if (!IsMatchingReplacementPalette(palette_hash, mode, palette, it->second.first))
 | |
|       continue;
 | |
| 
 | |
|     const TextureReplacementName& name = it->second.first;
 | |
|     const GSVector4i rect_in_write_space = name.GetDestRect();
 | |
|     const GSVector4i rect_in_page_space = rect_in_write_space.sub32(offset_to_page_v);
 | |
| 
 | |
|     // zw <= 0 or zw >= TEXTURE_PAGE_SIZE
 | |
|     if (!(rect_in_page_space.le32(
 | |
|             GSVector4i::cxpr(std::numeric_limits<s32>::min(), std::numeric_limits<s32>::min(), 0, 0)) |
 | |
|           rect_in_page_space.ge32(GSVector4i::cxpr(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT,
 | |
|                                                    std::numeric_limits<s32>::max(), std::numeric_limits<s32>::max())))
 | |
|            .allfalse())
 | |
|     {
 | |
|       // Rect is out of bounds.
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     GPUTexture* texture = GetTextureReplacementGPUImage(it->second.second);
 | |
|     if (!texture)
 | |
|       continue;
 | |
| 
 | |
|     // Especially for C16 textures, the write may span multiple pages. In this case, we need to offset
 | |
|     // the start of the page into the replacement texture.
 | |
|     const GSVector2i rect_in_page_space_start = rect_in_page_space.xy();
 | |
|     const GSVector2i src_offset =
 | |
|       GSVector2i::zero().sub32(rect_in_page_space_start) & rect_in_page_space_start.lt32(GSVector2i::zero());
 | |
|     const GSVector4i clamped_rect_in_page_space =
 | |
|       rect_in_page_space.add32(GSVector4i::xyxy(src_offset, GSVector2i::zero()))
 | |
|         .rintersect(GSVector4i::cxpr(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT));
 | |
| 
 | |
|     // TODO: This fails in Wild Arms 2, writes that are wider than a page.
 | |
|     DebugAssert(rect_in_page_space.width() == name.width && rect_in_page_space.height() == name.height);
 | |
|     DebugAssert(clamped_rect_in_page_space.width() <= static_cast<s32>(TEXTURE_PAGE_WIDTH));
 | |
|     DebugAssert(clamped_rect_in_page_space.height() <= static_cast<s32>(TEXTURE_PAGE_HEIGHT));
 | |
| 
 | |
|     const GSVector2 scale = GSVector2(texture->GetSizeVec()) / GSVector2(name.GetSizeVec());
 | |
|     replacements.push_back(TextureReplacementSubImage{
 | |
|       clamped_rect_in_page_space, GSVector4i::xyxy(src_offset, src_offset.add32(clamped_rect_in_page_space.rsize())),
 | |
|       texture, scale.x, scale.y, name.IsSemitransparent()});
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::HasTexturePageTextureReplacements()
 | |
| {
 | |
|   return !s_state.texture_page_texture_replacements.empty();
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::GetTexturePageTextureReplacements(std::vector<TextureReplacementSubImage>& replacements,
 | |
|                                                         u32 start_page_number, HashType page_hash,
 | |
|                                                         HashType palette_hash, GPUTextureMode mode,
 | |
|                                                         GPUTexturePaletteReg palette)
 | |
| {
 | |
|   // This is truely awful. Because we can dump a sub-page worth of texture, we need to examine the entire replacement
 | |
|   // list, because any of them could match up...
 | |
| 
 | |
|   const u8 shift = GetTextureModeShift(mode);
 | |
|   const GSVector4i page_start_in_vram =
 | |
|     GSVector4i(GSVector2i(VRAMPageStartX(start_page_number), VRAMPageStartY(start_page_number))).xyxy();
 | |
| 
 | |
|   for (TextureReplacementMap::const_iterator it = s_state.texture_page_texture_replacements.begin();
 | |
|        it != s_state.texture_page_texture_replacements.end(); ++it)
 | |
|   {
 | |
|     if (it->first.mode != mode)
 | |
|       continue;
 | |
| 
 | |
|     // Early-out if the palette mismatches, at least that'll save some cycles...
 | |
|     if (!IsMatchingReplacementPalette(palette_hash, mode, palette, it->second.first))
 | |
|       continue;
 | |
| 
 | |
|     const TextureReplacementName& name = it->second.first;
 | |
|     GSVector4i rect_in_page_space;
 | |
|     if (name.width == TEXTURE_PAGE_WIDTH && name.height == TEXTURE_PAGE_HEIGHT)
 | |
|     {
 | |
|       // This replacement is an entire page, so we can simply check the already-computed page hash.
 | |
|       DebugAssert(name.offset_x == 0 && name.offset_y == 0);
 | |
|       if (it->first.src_hash != page_hash)
 | |
|         continue;
 | |
| 
 | |
|       rect_in_page_space = GSVector4i::cxpr(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       // Unlike write replacements, the
 | |
|       // Replacement is part of a page, need to re-hash.
 | |
|       rect_in_page_space = name.GetDestRect();
 | |
|       const GSVector4i hash_rect =
 | |
|         rect_in_page_space.blend32<0x5>(rect_in_page_space.srl32(shift)).add32(page_start_in_vram);
 | |
|       const GPUTextureCache::HashType hash = GPUTextureCache::HashRect(hash_rect);
 | |
|       if (it->first.src_hash != hash)
 | |
|         continue;
 | |
|     }
 | |
| 
 | |
|     GPUTexture* texture = GetTextureReplacementGPUImage(it->second.second);
 | |
|     if (!texture)
 | |
|       continue;
 | |
| 
 | |
|     const GSVector2 scale = GSVector2(texture->GetSizeVec()) / GSVector2(name.GetSizeVec());
 | |
|     replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::loadh(name.GetSizeVec()), texture,
 | |
|                                                       scale.x, scale.y, name.IsSemitransparent()});
 | |
|   }
 | |
| }
 | |
| 
 | |
| std::optional<GPUTextureCache::TextureReplacementType>
 | |
| GPUTextureCache::GetTextureReplacementTypeFromFileTitle(const std::string_view path)
 | |
| {
 | |
|   if (path.starts_with("vram-write-"))
 | |
|     return TextureReplacementType::VRAMReplacement;
 | |
| 
 | |
|   if (path.starts_with("texupload-"))
 | |
|     return TextureReplacementType::TextureFromVRAMWrite;
 | |
| 
 | |
|   if (path.starts_with("texpage-"))
 | |
|     return TextureReplacementType::TextureFromPage;
 | |
| 
 | |
|   return std::nullopt;
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::HasValidReplacementExtension(const std::string_view path)
 | |
| {
 | |
|   const std::string_view extension = Path::GetExtension(path);
 | |
|   for (const char* test_extension : {"png", "jpg", "webp"})
 | |
|   {
 | |
|     if (StringUtil::EqualNoCase(extension, test_extension))
 | |
|       return true;
 | |
|   }
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::FindTextureReplacements(bool load_vram_write_replacements, bool load_texture_replacements,
 | |
|                                               bool prefill_dumped_texture_list, bool prefill_dumped_vram_list)
 | |
| {
 | |
|   if (GPUThread::GetGameSerial().empty())
 | |
|     return;
 | |
| 
 | |
|   FileSystem::FindResultsArray files;
 | |
|   FileSystem::FindFiles(GetTextureReplacementDirectory().c_str(), "*",
 | |
|                         FILESYSTEM_FIND_FILES | FILESYSTEM_FIND_RECURSIVE, &files);
 | |
| 
 | |
|   const bool add_texture_replacements_to_dumped =
 | |
|     prefill_dumped_texture_list && !g_gpu_settings.texture_replacements.dump_replaced_textures;
 | |
|   const bool add_vram_replacements_to_dumped =
 | |
|     prefill_dumped_vram_list && !g_gpu_settings.texture_replacements.dump_replaced_textures;
 | |
| 
 | |
|   for (FILESYSTEM_FIND_DATA& fd : files)
 | |
|   {
 | |
|     if ((fd.Attributes & FILESYSTEM_FILE_ATTRIBUTE_DIRECTORY) || !HasValidReplacementExtension(fd.FileName))
 | |
|       continue;
 | |
| 
 | |
|     const std::string_view file_title = Path::GetFileTitle(fd.FileName);
 | |
|     const std::optional<TextureReplacementType> type = GetTextureReplacementTypeFromFileTitle(file_title);
 | |
|     if (!type.has_value())
 | |
|       continue;
 | |
| 
 | |
|     switch (type.value())
 | |
|     {
 | |
|       case TextureReplacementType::VRAMReplacement:
 | |
|       {
 | |
|         VRAMReplacementName name;
 | |
|         if (!name.Parse(file_title))
 | |
|           continue;
 | |
| 
 | |
|         if (add_vram_replacements_to_dumped)
 | |
|           s_state.dumped_vram_writes.insert(name);
 | |
| 
 | |
|         if (load_vram_write_replacements)
 | |
|         {
 | |
|           if (const auto it = s_state.vram_replacements.find(name); it != s_state.vram_replacements.end())
 | |
|           {
 | |
|             WARNING_LOG("Duplicate VRAM replacement: '{}' and '{}'", Path::GetFileName(it->second),
 | |
|                         Path::GetFileName(fd.FileName));
 | |
|             continue;
 | |
|           }
 | |
| 
 | |
|           s_state.vram_replacements.emplace(name, std::move(fd.FileName));
 | |
|         }
 | |
|       }
 | |
|       break;
 | |
| 
 | |
|       case TextureReplacementType::TextureFromVRAMWrite:
 | |
|       case TextureReplacementType::TextureFromPage:
 | |
|       {
 | |
|         TextureReplacementName name;
 | |
|         if (!name.Parse(file_title))
 | |
|           continue;
 | |
| 
 | |
|         if (add_texture_replacements_to_dumped)
 | |
|           s_state.dumped_textures.insert(DumpedTextureKey::FromName(name));
 | |
| 
 | |
|         if (load_texture_replacements)
 | |
|         {
 | |
|           DebugAssert(name.type == type.value());
 | |
| 
 | |
|           const TextureReplacementIndex index = name.GetIndex();
 | |
|           TextureReplacementMap& dest_map = (type.value() == TextureReplacementType::TextureFromVRAMWrite) ?
 | |
|                                               s_state.vram_write_texture_replacements :
 | |
|                                               s_state.texture_page_texture_replacements;
 | |
| 
 | |
|           // Multiple replacements in the same write are fine. But they should have different rects.
 | |
|           const auto range = dest_map.equal_range(index);
 | |
|           bool duplicate = false;
 | |
|           for (auto it = range.first; it != range.second; ++it)
 | |
|           {
 | |
|             if (it->second.first == name) [[unlikely]]
 | |
|             {
 | |
|               WARNING_LOG("Duplicate texture replacement: '{}' and '{}'", Path::GetFileName(it->second.second),
 | |
|                           Path::GetFileName(fd.FileName));
 | |
|               duplicate = true;
 | |
|             }
 | |
|           }
 | |
|           if (duplicate) [[unlikely]]
 | |
|             continue;
 | |
| 
 | |
|           dest_map.emplace(index, std::make_pair(name, std::move(fd.FileName)));
 | |
|         }
 | |
|       }
 | |
|       break;
 | |
| 
 | |
|         DefaultCaseIsUnreachable()
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (g_gpu_settings.texture_replacements.enable_texture_replacements)
 | |
|   {
 | |
|     INFO_LOG("Found {} replacement upload textures for '{}'", s_state.vram_write_texture_replacements.size(),
 | |
|              GPUThread::GetGameSerial());
 | |
|     INFO_LOG("Found {} replacement page textures for '{}'", s_state.texture_page_texture_replacements.size(),
 | |
|              GPUThread::GetGameSerial());
 | |
|   }
 | |
| 
 | |
|   if (g_gpu_settings.texture_replacements.enable_vram_write_replacements)
 | |
|     INFO_LOG("Found {} replacement VRAM for '{}'", s_state.vram_replacements.size(), GPUThread::GetGameSerial());
 | |
| 
 | |
|   // if we're dumping, need to prefill the dumped list with those in the dumps directory as well
 | |
|   if (prefill_dumped_texture_list || prefill_dumped_vram_list)
 | |
|   {
 | |
|     FileSystem::FindFiles(GetTextureDumpDirectory().c_str(), "*", FILESYSTEM_FIND_FILES | FILESYSTEM_FIND_RECURSIVE,
 | |
|                           &files);
 | |
| 
 | |
|     for (FILESYSTEM_FIND_DATA& fd : files)
 | |
|     {
 | |
|       if ((fd.Attributes & FILESYSTEM_FILE_ATTRIBUTE_DIRECTORY) || !HasValidReplacementExtension(fd.FileName))
 | |
|         continue;
 | |
| 
 | |
|       const std::string_view file_title = Path::GetFileTitle(fd.FileName);
 | |
|       const std::optional<TextureReplacementType> type = GetTextureReplacementTypeFromFileTitle(file_title);
 | |
|       if (!type.has_value())
 | |
|         continue;
 | |
| 
 | |
|       switch (type.value())
 | |
|       {
 | |
|         case TextureReplacementType::VRAMReplacement:
 | |
|         {
 | |
|           VRAMReplacementName name;
 | |
|           if (!name.Parse(file_title))
 | |
|             continue;
 | |
| 
 | |
|           if (prefill_dumped_vram_list)
 | |
|             s_state.dumped_vram_writes.insert(name);
 | |
|         }
 | |
|         break;
 | |
| 
 | |
|         case TextureReplacementType::TextureFromVRAMWrite:
 | |
|         case TextureReplacementType::TextureFromPage:
 | |
|         {
 | |
|           TextureReplacementName name;
 | |
|           if (!name.Parse(file_title))
 | |
|             continue;
 | |
| 
 | |
|           if (prefill_dumped_texture_list)
 | |
|             s_state.dumped_textures.insert(DumpedTextureKey::FromName(name));
 | |
|         }
 | |
|         break;
 | |
| 
 | |
|           DefaultCaseIsUnreachable()
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::LoadTextureReplacementAliases(const ryml::ConstNodeRef& root,
 | |
|                                                     bool load_vram_write_replacement_aliases,
 | |
|                                                     bool load_texture_replacement_aliases)
 | |
| {
 | |
|   if (GPUThread::GetGameSerial().empty())
 | |
|     return;
 | |
| 
 | |
|   const std::string source_dir = GetTextureReplacementDirectory();
 | |
| 
 | |
|   for (const ryml::ConstNodeRef& current : root.cchildren())
 | |
|   {
 | |
|     const std::string_view key = to_stringview(current.key());
 | |
|     const std::optional<TextureReplacementType> type = GetTextureReplacementTypeFromFileTitle(key);
 | |
|     if (!type.has_value())
 | |
|       continue;
 | |
| 
 | |
|     const std::string_view replacement_filename = to_stringview(current.val());
 | |
|     std::string replacement_path = Path::Combine(source_dir, replacement_filename);
 | |
|     if (!FileSystem::FileExists(replacement_path.c_str()))
 | |
|     {
 | |
|       ERROR_LOG("File '{}' for alias '{}' does not exist.", key, replacement_filename);
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     switch (type.value())
 | |
|     {
 | |
|       case TextureReplacementType::VRAMReplacement:
 | |
|       {
 | |
|         VRAMReplacementName name;
 | |
|         if (!load_vram_write_replacement_aliases || !name.Parse(key))
 | |
|           continue;
 | |
| 
 | |
|         if (const auto it = s_state.vram_replacements.find(name); it != s_state.vram_replacements.end())
 | |
|         {
 | |
|           WARNING_LOG("Duplicate VRAM replacement alias: '{}' and '{}'", Path::GetFileName(it->second),
 | |
|                       replacement_filename);
 | |
|           continue;
 | |
|         }
 | |
| 
 | |
|         s_state.vram_replacements.emplace(name, std::move(replacement_path));
 | |
|       }
 | |
|       break;
 | |
| 
 | |
|       case TextureReplacementType::TextureFromVRAMWrite:
 | |
|       case TextureReplacementType::TextureFromPage:
 | |
|       {
 | |
|         TextureReplacementName name;
 | |
|         if (!load_texture_replacement_aliases || !name.Parse(key))
 | |
|           continue;
 | |
| 
 | |
|         DebugAssert(name.type == type.value());
 | |
| 
 | |
|         const TextureReplacementIndex index = name.GetIndex();
 | |
|         TextureReplacementMap& dest_map = (type.value() == TextureReplacementType::TextureFromVRAMWrite) ?
 | |
|                                             s_state.vram_write_texture_replacements :
 | |
|                                             s_state.texture_page_texture_replacements;
 | |
| 
 | |
|         // Multiple replacements in the same write are fine. But they should have different rects.
 | |
|         const auto range = dest_map.equal_range(index);
 | |
|         bool duplicate = false;
 | |
|         for (auto it = range.first; it != range.second; ++it)
 | |
|         {
 | |
|           if (it->second.first == name) [[unlikely]]
 | |
|           {
 | |
|             WARNING_LOG("Duplicate texture replacement alias: '{}' and '{}'", Path::GetFileName(it->second.second),
 | |
|                         replacement_filename);
 | |
|             duplicate = true;
 | |
|           }
 | |
|         }
 | |
|         if (duplicate) [[unlikely]]
 | |
|           continue;
 | |
| 
 | |
|         dest_map.emplace(index, std::make_pair(name, std::move(replacement_path)));
 | |
|       }
 | |
|       break;
 | |
| 
 | |
|         DefaultCaseIsUnreachable()
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (g_gpu_settings.texture_replacements.enable_texture_replacements)
 | |
|   {
 | |
|     INFO_LOG("Found {} replacement upload textures after applying aliases for '{}'",
 | |
|              s_state.vram_write_texture_replacements.size(), GPUThread::GetGameSerial());
 | |
|     INFO_LOG("Found {} replacement page textures after applying aliases for '{}'",
 | |
|              s_state.texture_page_texture_replacements.size(), GPUThread::GetGameSerial());
 | |
|   }
 | |
| 
 | |
|   if (g_gpu_settings.texture_replacements.enable_vram_write_replacements)
 | |
|   {
 | |
|     INFO_LOG("Found {} replacement VRAM after applying aliases for '{}'", s_state.vram_replacements.size(),
 | |
|              GPUThread::GetGameSerial());
 | |
|   }
 | |
| }
 | |
| 
 | |
| const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetTextureReplacementImage(const std::string& path)
 | |
| {
 | |
|   auto it = s_state.replacement_image_cache.find(path);
 | |
|   if (it != s_state.replacement_image_cache.end())
 | |
|     return &it->second;
 | |
| 
 | |
|   Image image;
 | |
|   Error error;
 | |
|   if (!image.LoadFromFile(path.c_str(), &error))
 | |
|   {
 | |
|     ERROR_LOG("Failed to load '{}': {}", Path::GetFileName(path), error.GetDescription());
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   VERBOSE_LOG("Loaded '{}': {}x{} {}", Path::GetFileName(path), image.GetWidth(), image.GetHeight(),
 | |
|               Image::GetFormatName(image.GetFormat()));
 | |
|   it = s_state.replacement_image_cache.emplace(path, std::move(image)).first;
 | |
|   return &it->second;
 | |
| }
 | |
| 
 | |
| GPUTexture* GPUTextureCache::GetTextureReplacementGPUImage(const std::string& path)
 | |
| {
 | |
|   // Already in cache?
 | |
|   const auto git = s_state.gpu_replacement_image_cache.find(path);
 | |
|   if (git != s_state.gpu_replacement_image_cache.end())
 | |
|   {
 | |
|     git->second.second = System::GetFrameNumber();
 | |
|     return git->second.first.get();
 | |
|   }
 | |
| 
 | |
|   // Need to upload it.
 | |
|   Error error;
 | |
|   std::unique_ptr<GPUTexture> tex;
 | |
| 
 | |
|   // Check CPU cache first.
 | |
|   const auto it = s_state.replacement_image_cache.find(path);
 | |
|   if (it != s_state.replacement_image_cache.end())
 | |
|   {
 | |
|     tex = g_gpu_device->FetchAndUploadTextureImage(it->second, GPUTexture::Flags::None, &error);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     // Need to load it.
 | |
|     Image cpu_image;
 | |
|     if (cpu_image.LoadFromFile(path.c_str(), &error))
 | |
|       tex = g_gpu_device->FetchAndUploadTextureImage(cpu_image, GPUTexture::Flags::None, &error);
 | |
|   }
 | |
| 
 | |
|   if (!tex)
 | |
|   {
 | |
|     ERROR_LOG("Failed to load/upload '{}': {}", Path::GetFileName(path), error.GetDescription());
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   const size_t vram_usage = tex->GetVRAMUsage();
 | |
|   s_state.gpu_replacement_image_cache_vram_usage += vram_usage;
 | |
| 
 | |
|   VERBOSE_LOG("Uploaded '{}': {}x{} {} {:.2f} KB", Path::GetFileName(path), tex->GetWidth(), tex->GetHeight(),
 | |
|               GPUTexture::GetFormatName(tex->GetFormat()), static_cast<float>(vram_usage) / 1024.0f);
 | |
| 
 | |
|   return s_state.gpu_replacement_image_cache.emplace(path, std::make_pair(std::move(tex), System::GetFrameNumber()))
 | |
|     .first->second.first.get();
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::CompactTextureReplacementGPUImages()
 | |
| {
 | |
|   // Instead of compacting to exactly the maximum, let's go down to the maximum less 16MB.
 | |
|   // That way we can hopefully avoid compacting again for a few frames.
 | |
|   static constexpr size_t EXTRA_COMPACT_SIZE = 16 * 1024 * 1024;
 | |
| 
 | |
|   const size_t max_usage = static_cast<size_t>(s_state.config.max_replacement_cache_vram_usage_mb) * 1048576;
 | |
|   if (s_state.gpu_replacement_image_cache_vram_usage <= max_usage)
 | |
|     return;
 | |
| 
 | |
|   DEV_LOG("Compacting replacement GPU image cache, count = {}, size = {:.1f} MB",
 | |
|           s_state.gpu_replacement_image_cache.size(),
 | |
|           static_cast<float>(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f);
 | |
| 
 | |
|   const u32 frame_number = System::GetFrameNumber();
 | |
|   s_state.gpu_replacement_image_cache_purge_list.reserve(s_state.gpu_replacement_image_cache.size());
 | |
|   for (auto it = s_state.gpu_replacement_image_cache.begin(); it != s_state.gpu_replacement_image_cache.end(); ++it)
 | |
|     s_state.gpu_replacement_image_cache_purge_list.emplace_back(it, frame_number - it->second.second);
 | |
| 
 | |
|   // Reverse sort, put the oldest on the end.
 | |
|   std::sort(s_state.gpu_replacement_image_cache_purge_list.begin(),
 | |
|             s_state.gpu_replacement_image_cache_purge_list.end(),
 | |
|             [](const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; });
 | |
| 
 | |
|   // See first comment above.
 | |
|   const size_t target_size = (max_usage < EXTRA_COMPACT_SIZE) ? max_usage : (max_usage - EXTRA_COMPACT_SIZE);
 | |
|   while (s_state.gpu_replacement_image_cache_vram_usage > target_size &&
 | |
|          !s_state.gpu_replacement_image_cache_purge_list.empty())
 | |
|   {
 | |
|     GPUReplacementImageCache::iterator iter = s_state.gpu_replacement_image_cache_purge_list.back().first;
 | |
|     s_state.gpu_replacement_image_cache_purge_list.pop_back();
 | |
| 
 | |
|     std::unique_ptr<GPUTexture> tex = std::move(iter->second.first);
 | |
|     s_state.gpu_replacement_image_cache.erase(iter);
 | |
|     s_state.gpu_replacement_image_cache_vram_usage -= tex->GetVRAMUsage();
 | |
|     g_gpu_device->RecycleTexture(std::move(tex));
 | |
|   }
 | |
| 
 | |
|   s_state.gpu_replacement_image_cache_purge_list.clear();
 | |
| 
 | |
|   DEV_LOG("Finished compacting replacement GPU image cache, count = {}, size = {:.1f} MB",
 | |
|           s_state.gpu_replacement_image_cache.size(),
 | |
|           static_cast<float>(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::PreloadReplacementTextures()
 | |
| {
 | |
|   static constexpr float UPDATE_INTERVAL = 1.0f;
 | |
| 
 | |
|   Timer last_update_time;
 | |
|   u32 num_textures_loaded = 0;
 | |
|   const size_t total_textures = s_state.vram_replacements.size() + s_state.vram_write_texture_replacements.size() +
 | |
|                                 s_state.texture_page_texture_replacements.size();
 | |
| 
 | |
| #define UPDATE_PROGRESS()                                                                                              \
 | |
|   if (last_update_time.GetTimeSeconds() >= UPDATE_INTERVAL)                                                            \
 | |
|   {                                                                                                                    \
 | |
|     ImGuiFullscreen::RenderLoadingScreen(ImGuiManager::LOGO_IMAGE_NAME, "Preloading replacement textures...", 0,       \
 | |
|                                          static_cast<int>(total_textures), static_cast<int>(num_textures_loaded));     \
 | |
|     last_update_time.Reset();                                                                                          \
 | |
|   }
 | |
| 
 | |
|   for (const auto& it : s_state.vram_replacements)
 | |
|   {
 | |
|     UPDATE_PROGRESS();
 | |
|     GetTextureReplacementImage(it.second);
 | |
|     num_textures_loaded++;
 | |
|   }
 | |
| 
 | |
| #define PROCESS_MAP(map)                                                                                               \
 | |
|   for (const auto& it : map)                                                                                           \
 | |
|   {                                                                                                                    \
 | |
|     UPDATE_PROGRESS();                                                                                                 \
 | |
|     GetTextureReplacementImage(it.second.second);                                                                      \
 | |
|     num_textures_loaded++;                                                                                             \
 | |
|   }
 | |
| 
 | |
|   PROCESS_MAP(s_state.vram_write_texture_replacements);
 | |
|   PROCESS_MAP(s_state.texture_page_texture_replacements);
 | |
| #undef PROCESS_MAP
 | |
| #undef UPDATE_PROGRESS
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::EnsureGameDirectoryExists()
 | |
| {
 | |
|   if (GPUThread::GetGameSerial().empty())
 | |
|     return false;
 | |
| 
 | |
|   const std::string game_directory = Path::Combine(EmuFolders::Textures, GPUThread::GetGameSerial());
 | |
|   if (FileSystem::DirectoryExists(game_directory.c_str()))
 | |
|     return true;
 | |
| 
 | |
|   Error error;
 | |
|   if (!FileSystem::CreateDirectory(game_directory.c_str(), false, &error))
 | |
|   {
 | |
|     ERROR_LOG("Failed to create game directory: {}", error.GetDescription());
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   if (const std::string config_path = Path::Combine(game_directory, LOCAL_CONFIG_FILENAME);
 | |
|       !FileSystem::FileExists(config_path.c_str()) &&
 | |
|       !FileSystem::WriteStringToFile(config_path.c_str(),
 | |
|                                      Settings::TextureReplacementSettings().config.ExportToYAML(true), &error))
 | |
|   {
 | |
|     ERROR_LOG("Failed to write configuration template: {}", error.GetDescription());
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   if (!FileSystem::CreateDirectory(Path::Combine(game_directory, "dumps").c_str(), false, &error))
 | |
|   {
 | |
|     ERROR_LOG("Failed to create dumps directory: {}", error.GetDescription());
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   if (!FileSystem::CreateDirectory(Path::Combine(game_directory, "replacements").c_str(), false, &error))
 | |
|   {
 | |
|     ERROR_LOG("Failed to create replacements directory: {}", error.GetDescription());
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| std::string GPUTextureCache::GetTextureReplacementDirectory()
 | |
| {
 | |
|   const std::string& serial = GPUThread::GetGameSerial();
 | |
|   std::string dir =
 | |
|     Path::Combine(EmuFolders::Textures, SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "replacements", serial));
 | |
|   if (!FileSystem::DirectoryExists(dir.c_str()))
 | |
|   {
 | |
|     // Check for the old directory structure without a replacements subdirectory.
 | |
|     std::string altdir = Path::Combine(EmuFolders::Textures, serial);
 | |
|     if (FileSystem::DirectoryExists(altdir.c_str()))
 | |
|     {
 | |
|       WARNING_LOG("Using deprecated texture replacement directory {}", altdir);
 | |
|       dir = std::move(altdir);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       // If this is a multi-disc game, try the first disc.
 | |
|       const GameDatabase::Entry* dbentry = GameDatabase::GetEntryForSerial(serial);
 | |
|       if (dbentry && !dbentry->disc_set_serials.empty() && serial != dbentry->disc_set_serials.front())
 | |
|       {
 | |
|         altdir =
 | |
|           Path::Combine(EmuFolders::Textures, SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "replacements",
 | |
|                                                                        dbentry->disc_set_serials.front()));
 | |
|         if (FileSystem::DirectoryExists(altdir.c_str()))
 | |
|         {
 | |
|           WARNING_LOG("Using texture replacements from first disc {}", dbentry->disc_set_serials.front());
 | |
|           dir = std::move(altdir);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return dir;
 | |
| }
 | |
| 
 | |
| std::string GPUTextureCache::GetTextureDumpDirectory()
 | |
| {
 | |
|   return Path::Combine(EmuFolders::Textures,
 | |
|                        SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "dumps", GPUThread::GetGameSerial()));
 | |
| }
 | |
| 
 | |
| GPUTextureCache::VRAMReplacementName GPUTextureCache::GetVRAMWriteHash(u32 width, u32 height, const void* pixels)
 | |
| {
 | |
|   const XXH128_hash_t hash = XXH3_128bits(pixels, width * height * sizeof(u16));
 | |
|   return {hash.low64, hash.high64};
 | |
| }
 | |
| 
 | |
| std::string GPUTextureCache::GetVRAMWriteDumpPath(const VRAMReplacementName& name)
 | |
| {
 | |
|   std::string ret;
 | |
|   if (!EnsureGameDirectoryExists())
 | |
|     return ret;
 | |
| 
 | |
|   const std::string dump_directory = GetTextureDumpDirectory();
 | |
|   if (!FileSystem::EnsureDirectoryExists(dump_directory.c_str(), false))
 | |
|     return ret;
 | |
| 
 | |
|   return Path::Combine(dump_directory, SmallString::from_format("vram-write-{}.png", name.ToString()));
 | |
| }
 | |
| 
 | |
| bool GPUTextureCache::LoadLocalConfiguration(bool load_vram_write_replacement_aliases,
 | |
|                                              bool load_texture_replacement_aliases)
 | |
| {
 | |
|   const Settings::TextureReplacementSettings::Configuration old_config = s_state.config;
 | |
| 
 | |
|   // load settings from ini
 | |
|   s_state.config = g_gpu_settings.texture_replacements.config;
 | |
| 
 | |
|   const std::string& game_serial = GPUThread::GetGameSerial();
 | |
|   if (game_serial.empty())
 | |
|     return (s_state.config != old_config);
 | |
| 
 | |
|   const std::optional<std::string> ini_data = FileSystem::ReadFileToString(
 | |
|     Path::Combine(EmuFolders::Textures,
 | |
|                   SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "{}", game_serial, LOCAL_CONFIG_FILENAME))
 | |
|       .c_str());
 | |
|   if (!ini_data.has_value() || ini_data->empty())
 | |
|     return (s_state.config != old_config);
 | |
| 
 | |
|   const ryml::Tree tree = ryml::parse_in_arena(LOCAL_CONFIG_FILENAME, to_csubstr(ini_data.value()));
 | |
|   const ryml::ConstNodeRef root = tree.rootref();
 | |
| 
 | |
|   // This is false if all we have are comments
 | |
|   if (!root.is_map())
 | |
|     return (s_state.config != old_config);
 | |
| 
 | |
|   s_state.config.dump_texture_pages = GetOptionalTFromObject<bool>(root, "DumpTexturePages")
 | |
|                                         .value_or(static_cast<bool>(s_state.config.dump_texture_pages));
 | |
|   s_state.config.dump_full_texture_pages = GetOptionalTFromObject<bool>(root, "DumpFullTexturePages")
 | |
|                                              .value_or(static_cast<bool>(s_state.config.dump_full_texture_pages));
 | |
|   s_state.config.dump_texture_force_alpha_channel =
 | |
|     GetOptionalTFromObject<bool>(root, "DumpTextureForceAlphaChannel")
 | |
|       .value_or(static_cast<bool>(s_state.config.dump_texture_force_alpha_channel));
 | |
|   s_state.config.dump_vram_write_force_alpha_channel =
 | |
|     GetOptionalTFromObject<bool>(root, "DumpVRAMWriteForceAlphaChannel")
 | |
|       .value_or(static_cast<bool>(s_state.config.dump_vram_write_force_alpha_channel));
 | |
|   s_state.config.dump_c16_textures =
 | |
|     GetOptionalTFromObject<bool>(root, "DumpC16Textures").value_or(static_cast<bool>(s_state.config.dump_c16_textures));
 | |
|   s_state.config.reduce_palette_range = GetOptionalTFromObject<bool>(root, "ReducePaletteRange")
 | |
|                                           .value_or(static_cast<bool>(s_state.config.reduce_palette_range));
 | |
|   s_state.config.convert_copies_to_writes = GetOptionalTFromObject<bool>(root, "ConvertCopiesToWrites")
 | |
|                                               .value_or(static_cast<bool>(s_state.config.convert_copies_to_writes));
 | |
|   s_state.config.max_vram_write_splits =
 | |
|     GetOptionalTFromObject<bool>(root, "MaxVRAMWriteSplits").value_or(s_state.config.max_vram_write_splits);
 | |
|   s_state.config.max_vram_write_coalesce_width = GetOptionalTFromObject<u16>(root, "MaxVRAMWriteCoalesceWidth")
 | |
|                                                    .value_or(s_state.config.max_vram_write_coalesce_width);
 | |
|   s_state.config.max_vram_write_coalesce_height = GetOptionalTFromObject<u16>(root, "MaxVRAMWriteCoalesceHeight")
 | |
|                                                     .value_or(s_state.config.max_vram_write_coalesce_height);
 | |
|   s_state.config.texture_dump_width_threshold = GetOptionalTFromObject<u16>(root, "DumpTextureWidthThreshold")
 | |
|                                                   .value_or(s_state.config.texture_dump_width_threshold);
 | |
|   s_state.config.texture_dump_height_threshold = GetOptionalTFromObject<u16>(root, "DumpTextureHeightThreshold")
 | |
|                                                    .value_or(s_state.config.texture_dump_height_threshold);
 | |
|   s_state.config.vram_write_dump_width_threshold = GetOptionalTFromObject<u16>(root, "DumpVRAMWriteWidthThreshold")
 | |
|                                                      .value_or(s_state.config.vram_write_dump_width_threshold);
 | |
|   s_state.config.vram_write_dump_height_threshold = GetOptionalTFromObject<u16>(root, "DumpVRAMWriteHeightThreshold")
 | |
|                                                       .value_or(s_state.config.vram_write_dump_height_threshold);
 | |
|   s_state.config.max_hash_cache_entries =
 | |
|     GetOptionalTFromObject<u32>(root, "MaxHashCacheEntries").value_or(s_state.config.max_hash_cache_entries);
 | |
|   s_state.config.max_hash_cache_vram_usage_mb =
 | |
|     GetOptionalTFromObject<u32>(root, "MaxHashCacheVRAMUsageMB").value_or(s_state.config.max_hash_cache_vram_usage_mb);
 | |
|   s_state.config.max_replacement_cache_vram_usage_mb = GetOptionalTFromObject<u32>(root, "MaxReplacementCacheVRAMUsage")
 | |
|                                                          .value_or(s_state.config.max_replacement_cache_vram_usage_mb);
 | |
|   s_state.config.replacement_scale_linear_filter =
 | |
|     GetOptionalTFromObject<bool>(root, "ReplacementScaleLinearFilter")
 | |
|       .value_or(static_cast<bool>(s_state.config.replacement_scale_linear_filter));
 | |
| 
 | |
|   if (load_vram_write_replacement_aliases || load_texture_replacement_aliases)
 | |
|   {
 | |
|     const ryml::ConstNodeRef aliases = root.find_child("Aliases");
 | |
|     if (aliases.valid() && aliases.has_children())
 | |
|       LoadTextureReplacementAliases(aliases, load_vram_write_replacement_aliases, load_texture_replacement_aliases);
 | |
|   }
 | |
| 
 | |
|   // Any change?
 | |
|   return (s_state.config != old_config);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::ReloadTextureReplacements(bool show_info)
 | |
| {
 | |
|   s_state.dumped_textures.clear();
 | |
|   s_state.dumped_vram_writes.clear();
 | |
|   s_state.vram_replacements.clear();
 | |
|   s_state.vram_write_texture_replacements.clear();
 | |
|   s_state.texture_page_texture_replacements.clear();
 | |
| 
 | |
|   const bool load_vram_write_replacements = (g_gpu_settings.texture_replacements.enable_vram_write_replacements);
 | |
|   const bool load_texture_replacements =
 | |
|     (g_gpu_settings.gpu_texture_cache && g_gpu_settings.texture_replacements.enable_texture_replacements);
 | |
|   const bool prefill_dumped_texture_list =
 | |
|     (g_gpu_settings.texture_replacements.dump_vram_writes || g_gpu_settings.texture_replacements.dump_textures);
 | |
|   const bool prefill_dumped_vram_list =
 | |
|     (g_gpu_settings.texture_replacements.dump_vram_writes || g_gpu_settings.texture_replacements.dump_textures);
 | |
|   if (load_vram_write_replacements || load_texture_replacements || prefill_dumped_texture_list ||
 | |
|       prefill_dumped_vram_list)
 | |
|   {
 | |
|     FindTextureReplacements(load_vram_write_replacements, load_texture_replacements, prefill_dumped_texture_list,
 | |
|                             prefill_dumped_vram_list);
 | |
|   }
 | |
| 
 | |
|   LoadLocalConfiguration(load_vram_write_replacements, load_texture_replacements);
 | |
| 
 | |
|   if (g_gpu_settings.texture_replacements.preload_textures)
 | |
|     PreloadReplacementTextures();
 | |
| 
 | |
|   PurgeUnreferencedTexturesFromCache();
 | |
| 
 | |
|   UpdateVRAMTrackingState();
 | |
|   InvalidateSources();
 | |
| 
 | |
|   if (show_info)
 | |
|   {
 | |
|     const int total =
 | |
|       static_cast<int>(s_state.vram_replacements.size() + s_state.vram_write_texture_replacements.size() +
 | |
|                        s_state.texture_page_texture_replacements.size());
 | |
|     Host::AddIconOSDMessage("ReloadTextureReplacements", ICON_EMOJI_REFRESH,
 | |
|                             (total > 0) ? TRANSLATE_PLURAL_STR("GPU_HW", "%n replacement textures found.",
 | |
|                                                                "Replacement texture count", total) :
 | |
|                                           TRANSLATE_STR("GPU_HW", "No replacement textures found."),
 | |
|                             Host::OSD_INFO_DURATION);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::PurgeUnreferencedTexturesFromCache()
 | |
| {
 | |
|   ReplacementImageCache old_map = std::move(s_state.replacement_image_cache);
 | |
|   GPUReplacementImageCache old_gpu_map = std::move(s_state.gpu_replacement_image_cache);
 | |
|   s_state.replacement_image_cache = ReplacementImageCache();
 | |
|   s_state.gpu_replacement_image_cache = GPUReplacementImageCache();
 | |
| 
 | |
|   const auto reinsert_texture = [&old_map, &old_gpu_map](const std::string& name) {
 | |
|     const auto it2 = old_map.find(name);
 | |
|     if (it2 != old_map.end())
 | |
|     {
 | |
|       s_state.replacement_image_cache.emplace(name, std::move(it2->second));
 | |
|       old_map.erase(it2);
 | |
|     }
 | |
| 
 | |
|     const auto it3 = old_gpu_map.find(name);
 | |
|     if (it3 != old_gpu_map.end())
 | |
|     {
 | |
|       s_state.gpu_replacement_image_cache.emplace(name, std::move(it3->second));
 | |
|       old_gpu_map.erase(it3);
 | |
|     }
 | |
|   };
 | |
| 
 | |
|   for (const auto& it : s_state.vram_replacements)
 | |
|     reinsert_texture(it.second);
 | |
| 
 | |
|   for (const auto& it : s_state.vram_write_texture_replacements)
 | |
|     reinsert_texture(it.second.second);
 | |
| 
 | |
|   for (const auto& it : s_state.texture_page_texture_replacements)
 | |
|     reinsert_texture(it.second.second);
 | |
| }
 | |
| 
 | |
| void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, HashType pal_hash,
 | |
|                                                HashCacheEntry* entry)
 | |
| {
 | |
|   std::vector<TextureReplacementSubImage> subimages;
 | |
|   if (HasTexturePageTextureReplacements())
 | |
|   {
 | |
|     GetTexturePageTextureReplacements(subimages, key.page, tex_hash, pal_hash, key.mode, key.palette);
 | |
|   }
 | |
| 
 | |
|   if (HasVRAMWriteTextureReplacements())
 | |
|   {
 | |
|     // Wrapping around the edge for replacement testing breaks 8/16-bit textures in the rightmost page.
 | |
|     const GSVector4i page_rect = GetTextureRectWithoutWrap(key.page, key.mode);
 | |
|     LoopRectPages(page_rect, [&key, &pal_hash, &subimages, &page_rect](u32 pn) {
 | |
|       const PageEntry& page = s_state.pages[pn];
 | |
|       ListIterate(page.writes, [&key, &pal_hash, &subimages, &page_rect](const VRAMWrite* vrw) {
 | |
|         // TODO: Is this needed?
 | |
|         if (!vrw->write_rect.rintersects(page_rect))
 | |
|           return;
 | |
| 
 | |
|         // Map VRAM write to the start of the page.
 | |
|         GSVector2i offset_to_page = page_rect.sub32(vrw->write_rect).xy();
 | |
| 
 | |
|         // Need to apply the texture shift on the X dimension, not Y. No SLLV on SSE4.. :(
 | |
|         offset_to_page.x = ApplyTextureModeShift(key.mode, offset_to_page.x);
 | |
| 
 | |
|         GetVRAMWriteTextureReplacements(subimages, vrw->hash, pal_hash, key.mode, key.palette, offset_to_page);
 | |
|       });
 | |
|     });
 | |
|   }
 | |
| 
 | |
|   if (subimages.empty())
 | |
|     return;
 | |
| 
 | |
|   float max_scale_x = subimages[0].scale_x, max_scale_y = subimages[0].scale_y;
 | |
|   for (size_t i = 0; i < subimages.size(); i++)
 | |
|   {
 | |
|     max_scale_x = std::max(max_scale_x, subimages[i].scale_x);
 | |
|     max_scale_y = std::max(max_scale_y, subimages[i].scale_y);
 | |
|   }
 | |
| 
 | |
|   // Clamp to max texture size
 | |
|   const float max_possible_scale =
 | |
|     static_cast<float>(g_gpu_device->GetMaxTextureSize()) / static_cast<float>(TEXTURE_PAGE_WIDTH);
 | |
|   max_scale_x = std::min(max_scale_x, max_possible_scale);
 | |
|   max_scale_y = std::min(max_scale_y, max_possible_scale);
 | |
| 
 | |
|   const GSVector4 max_scale_v = GSVector4(max_scale_x, max_scale_y).xyxy();
 | |
|   const u32 new_width = static_cast<u32>(std::ceil(static_cast<float>(TEXTURE_PAGE_WIDTH) * max_scale_x));
 | |
|   const u32 new_height = static_cast<u32>(std::ceil(static_cast<float>(TEXTURE_PAGE_HEIGHT) * max_scale_y));
 | |
|   if (!s_state.replacement_texture_render_target || s_state.replacement_texture_render_target->GetWidth() < new_width ||
 | |
|       s_state.replacement_texture_render_target->GetHeight() < new_height)
 | |
|   {
 | |
|     // NOTE: Not recycled, it's unlikely to be reused.
 | |
|     s_state.replacement_texture_render_target.reset();
 | |
|     if (!(s_state.replacement_texture_render_target =
 | |
|             g_gpu_device->CreateTexture(new_width, new_height, 1, 1, 1, GPUTexture::Type::RenderTarget,
 | |
|                                         REPLACEMENT_TEXTURE_FORMAT, GPUTexture::Flags::None)))
 | |
|     {
 | |
|       ERROR_LOG("Failed to create {}x{} render target.", new_width, new_height);
 | |
|       return;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Grab the actual texture beforehand, in case we OOM.
 | |
|   std::unique_ptr<GPUTexture> replacement_tex = g_gpu_device->FetchTexture(
 | |
|     new_width, new_height, 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT, GPUTexture::Flags::None);
 | |
|   if (!replacement_tex)
 | |
|   {
 | |
|     ERROR_LOG("Failed to create {}x{} texture.", new_width, new_height);
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   GL_SCOPE_FMT("ApplyTextureReplacements({:016X}, {:016X}) => {}x{}", tex_hash, pal_hash, replacement_tex->GetWidth(),
 | |
|                replacement_tex->GetHeight());
 | |
| 
 | |
|   // TODO: Use rects instead of fullscreen tris, maybe avoid the copy..
 | |
|   g_gpu_device->InvalidateRenderTarget(s_state.replacement_texture_render_target.get());
 | |
|   g_gpu_device->SetRenderTarget(s_state.replacement_texture_render_target.get());
 | |
| 
 | |
|   GL_INS("Upscale Texture Page");
 | |
|   alignas(VECTOR_ALIGNMENT) float uniforms[8];
 | |
|   GSVector2 texture_size = GSVector2(GSVector2i(entry->texture->GetWidth(), entry->texture->GetHeight()));
 | |
|   GSVector4::store<true>(&uniforms[0], GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f));
 | |
|   GSVector2::store<true>(&uniforms[4], texture_size);
 | |
|   GSVector2::store<true>(&uniforms[6], GSVector2::cxpr(1.0f) / texture_size);
 | |
|   g_gpu_device->SetViewportAndScissor(0, 0, new_width, new_height);
 | |
|   g_gpu_device->SetPipeline(s_state.replacement_upscale_pipeline.get());
 | |
|   g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
 | |
|   g_gpu_device->SetTextureSampler(0, entry->texture.get(), g_gpu_device->GetNearestSampler());
 | |
|   g_gpu_device->Draw(3, 0);
 | |
| 
 | |
|   for (const TextureReplacementSubImage& si : subimages)
 | |
|   {
 | |
|     GL_INS_FMT("Blit {}x{} replacement from {} to {}", si.texture->GetWidth(), si.texture->GetHeight(), si.src_rect,
 | |
|                si.dst_rect);
 | |
| 
 | |
|     const GSVector4 src_rect = (GSVector4(GSVector4i::xyxy(si.src_rect.xy(), si.src_rect.rsize())) *
 | |
|                                 GSVector4::xyxy(GSVector2(si.scale_x, si.scale_y))) /
 | |
|                                GSVector4(GSVector4i::xyxy(si.texture->GetSizeVec()));
 | |
|     const GSVector4i dst_rect = GSVector4i(GSVector4(si.dst_rect) * max_scale_v);
 | |
|     texture_size = GSVector2(si.texture->GetSizeVec());
 | |
|     GSVector4::store<true>(&uniforms[0], src_rect);
 | |
|     GSVector2::store<true>(&uniforms[4], texture_size);
 | |
|     GSVector2::store<true>(&uniforms[6], GSVector2::cxpr(1.0f) / texture_size);
 | |
|     g_gpu_device->SetViewportAndScissor(dst_rect);
 | |
|     g_gpu_device->SetTextureSampler(0, si.texture,
 | |
|                                     s_state.config.replacement_scale_linear_filter ? g_gpu_device->GetLinearSampler() :
 | |
|                                                                                      g_gpu_device->GetNearestSampler());
 | |
|     g_gpu_device->SetPipeline(si.invert_alpha ? s_state.replacement_semitransparent_draw_pipeline.get() :
 | |
|                                                 s_state.replacement_draw_pipeline.get());
 | |
|     g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
 | |
|     g_gpu_device->Draw(3, 0);
 | |
|   }
 | |
| 
 | |
|   g_gpu_device->CopyTextureRegion(replacement_tex.get(), 0, 0, 0, 0, s_state.replacement_texture_render_target.get(), 0,
 | |
|                                   0, 0, 0, new_width, new_height);
 | |
|   g_gpu_device->RecycleTexture(std::move(entry->texture));
 | |
|   entry->texture = std::move(replacement_tex);
 | |
| 
 | |
|   s_state.hw_backend->RestoreDeviceContext();
 | |
| } |