| 
						
						
							
								
							
						
						
					 | 
				
			
			 | 
			 | 
			
				@ -1519,6 +1519,237 @@ constinit const DrawTriangleFunctionTable DrawTriangleFunctions = {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				   {{&DrawTriangle<true, true, false, false>, &DrawTriangle<true, true, false, true>},
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {&DrawTriangle<true, true, true, false>, &DrawTriangle<true, true, true, true>}}}};
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				static void FillVRAMImpl(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced, u8 active_line_lsb)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				{
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				#ifdef USE_VECTOR
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  const u16 color16 = VRAMRGBA8888ToRGBA5551(color);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  const GSVector4i fill = GSVector4i(color16, color16, color16, color16, color16, color16, color16, color16);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  constexpr u32 vector_width = 8;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  const u32 aligned_width = Common::AlignDownPow2(width, vector_width);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  if ((x + width) <= VRAM_WIDTH && !interlaced)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 yoffs = 0; yoffs < height; yoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      const u32 row = (y + yoffs) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u16* row_ptr = &g_vram[row * VRAM_WIDTH + x];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u32 xoffs = 0;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        GSVector4i::store<false>(row_ptr, fill);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (; xoffs < width; xoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        *(row_ptr++) = color16;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  else if (interlaced)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    const u32 active_field = active_line_lsb;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    if ((x + width) <= VRAM_WIDTH)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (u32 yoffs = 0; yoffs < height; yoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        const u32 row = (y + yoffs) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        if ((row & u32(1)) == active_field)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				          continue;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        u16* row_ptr = &g_vram[row * VRAM_WIDTH + x];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        u32 xoffs = 0;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				          GSVector4i::store<false>(row_ptr, fill);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        for (; xoffs < width; xoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				          *(row_ptr++) = color16;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    else
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (u32 yoffs = 0; yoffs < height; yoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        const u32 row = (y + yoffs) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        if ((row & u32(1)) == active_field)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				          continue;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        u16* row_ptr = &g_vram[row * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        for (u32 xoffs = 0; xoffs < width; xoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				          const u32 col = (x + xoffs) % VRAM_WIDTH;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				          row_ptr[col] = color16;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  else
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 yoffs = 0; yoffs < height; yoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      const u32 row = (y + yoffs) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u16* row_ptr = &g_vram[row * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (u32 xoffs = 0; xoffs < width; xoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        const u32 col = (x + xoffs) % VRAM_WIDTH;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        row_ptr[col] = color16;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				#else
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  const u16 color16 = VRAMRGBA8888ToRGBA5551(color);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  if ((x + width) <= VRAM_WIDTH && !interlaced)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 yoffs = 0; yoffs < height; yoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      const u32 row = (y + yoffs) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  else if (interlaced)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    const u32 active_field = active_line_lsb;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 yoffs = 0; yoffs < height; yoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      const u32 row = (y + yoffs) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      if ((row & u32(1)) == active_field)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        continue;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u16* row_ptr = &g_vram[row * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (u32 xoffs = 0; xoffs < width; xoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        const u32 col = (x + xoffs) % VRAM_WIDTH;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        row_ptr[col] = color16;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  else
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 yoffs = 0; yoffs < height; yoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      const u32 row = (y + yoffs) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u16* row_ptr = &g_vram[row * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (u32 xoffs = 0; xoffs < width; xoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        const u32 col = (x + xoffs) % VRAM_WIDTH;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        row_ptr[col] = color16;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				#endif
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				}
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				static void WriteVRAMImpl(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				{
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  // TODO: Vector implementation
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  // Fast path when the copy is not oversized.
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    const u16* src_ptr = static_cast<const u16*>(data);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 yoffs = 0; yoffs < height; yoffs++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      std::copy_n(src_ptr, width, dst_ptr);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      src_ptr += width;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      dst_ptr += VRAM_WIDTH;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  else
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    // Slow path when we need to handle wrap-around.
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    const u16* src_ptr = static_cast<const u16*>(data);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    const u16 mask_and = check_mask ? 0x8000u : 0x0000u;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    const u16 mask_or = set_mask ? 0x8000u : 0x0000u;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 row = 0; row < height;)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (u32 col = 0; col < width;)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        // TODO: Handle unaligned reads...
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        if (((*pixel_ptr) & mask_and) == 0)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				          *pixel_ptr = *(src_ptr++) | mask_or;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				}
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				static void CopyVRAMImpl(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask,
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				                         bool check_mask)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				{
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  // TODO: Vector implementation.
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  // Break up oversized copies. This behavior has not been verified on console.
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    u32 remaining_rows = height;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    u32 current_src_y = src_y;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    u32 current_dst_y = dst_y;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    while (remaining_rows > 0)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      const u32 rows_to_copy =
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u32 remaining_columns = width;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u32 current_src_x = src_x;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u32 current_dst_x = dst_x;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      while (remaining_columns > 0)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        const u32 columns_to_copy =
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				          std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy, set_mask,
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				                 check_mask);
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        remaining_columns -= columns_to_copy;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      remaining_rows -= rows_to_copy;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    return;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  // This doesn't have a fast path, but do we really need one? It's not common.
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  const u16 mask_and = check_mask ? 0x8000u : 0x0000u;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  const u16 mask_or = set_mask ? 0x8000u : 0x0000u;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  // Copy in reverse when src_x < dst_x, this is verified on console.
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 row = 0; row < height; row++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        *dst_pixel_ptr = ((*dst_pixel_ptr & mask_and) == 0) ? (src_pixel | mask_or) : *dst_pixel_ptr;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  else
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    for (u32 row = 0; row < height; row++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      for (u32 col = 0; col < width; col++)
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      {
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				        *dst_pixel_ptr = ((*dst_pixel_ptr & mask_and) == 0) ? (src_pixel | mask_or) : *dst_pixel_ptr;
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				      }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				    }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				  }
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				}
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				#ifdef __INTELLISENSE__
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				}
 | 
			
		
		
	
		
			
				 | 
				 | 
			
			 | 
			 | 
			
				#endif
 | 
			
		
		
	
	
		
			
				
					| 
						
						
						
					 | 
				
			
			 | 
			 | 
			
				
 
 |