GPU: Small optimization to lanczos shader

Apparently the Adreno shader compiler can't do this on its own.

~23% performance improvement.
pull/3372/head
Stenzek 2 weeks ago
parent 958fd0049c
commit ce8b1f0996
No known key found for this signature in database

@ -108,15 +108,13 @@ CONSTANT float PI = 3.14159265359;
float lanczos(float x) float lanczos(float x)
{ {
x = abs(x); x = abs(x);
if (x < 0.0001)
return 1.0;
if (x > float(KERNEL_SIZE)) float px = PI * x;
return 0.0; float v = (float(KERNEL_SIZE) * sin(px) * sin(px / float(KERNEL_SIZE))) / (px * px);
v = (x < 0.0001) ? 1.0 : v;
float px = PI * x; v = (x > float(KERNEL_SIZE)) ? 0.0 : v;
return (float(KERNEL_SIZE) * sin(px) * sin(px / float(KERNEL_SIZE))) / (px * px); return v;
} }
)"; )";
@ -133,16 +131,16 @@ float lanczos(float x)
for (int i = -KERNEL_SIZE; i <= KERNEL_SIZE; i++) for (int i = -KERNEL_SIZE; i <= KERNEL_SIZE; i++)
{ {
for (int j = -KERNEL_SIZE; j <= KERNEL_SIZE; j++) for (int j = -KERNEL_SIZE; j <= KERNEL_SIZE; j++)
{ {
float2 offset = float2(int2(i, j)); float2 offset = float2(int2(i, j));
float2 sample_pos = (src + offset) * u_src_size.zw; float2 sample_pos = (src + offset) * u_src_size.zw;
float2 dxdy = src_pixel - (src + offset); float2 dxdy = src_pixel - (src + offset);
float weight = lanczos(dxdy.x) * lanczos(dxdy.y); float weight = lanczos(dxdy.x) * lanczos(dxdy.y);
color += SAMPLE_TEXTURE_LEVEL(samp0, ClampUV(sample_pos), 0.0).rgb * weight; color += SAMPLE_TEXTURE_LEVEL(samp0, ClampUV(sample_pos), 0.0).rgb * weight;
total_weight += weight; total_weight += weight;
} }
} }
o_col0 = float4(color / total_weight, 1.0); o_col0 = float4(color / total_weight, 1.0);

Loading…
Cancel
Save