aboutsummaryrefslogtreecommitdiff
path: root/src/GPU3D_Soft.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/GPU3D_Soft.cpp')
-rw-r--r--src/GPU3D_Soft.cpp149
1 files changed, 107 insertions, 42 deletions
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index 3cbb71e..edc4ce9 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -155,49 +155,82 @@ void Reset()
// interpolation, avoiding precision loss from the aforementioned approximation.
// Which is desirable when using the GPU to draw 2D graphics.
+template<int dir>
class Interpolator
{
public:
Interpolator() {}
- Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, int shift)
+ Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
{
- Setup(x0, x1, w0, w1, shift);
+ Setup(x0, x1, w0, w1);
}
- void Setup(s32 x0, s32 x1, s32 w0, s32 w1, int shift)
+ void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
{
this->x0 = x0;
this->x1 = x1;
this->xdiff = x1 - x0;
- this->shift = shift;
- this->w0factor = (s64)w0 * xdiff;
- this->w1factor = (s64)w1 * xdiff;
- this->wdiff = w1 - w0;
+ // calculate reciprocals for linear mode and Z interpolation
+ // TODO eventually: use a faster reciprocal function?
+ if (this->xdiff != 0)
+ this->xrecip = (1<<30) / this->xdiff;
+ else
+ this->xrecip = 0;
+ this->xrecip_z = this->xrecip >> 8;
+
+ // linear mode is used if both W values are equal and have
+ // low-order bits cleared (0-6 along X, 1-6 along Y)
+ u32 mask = dir ? 0x7E : 0x7F;
+ if ((w0 == w1) && !(w0 & mask) && !(w1 & mask))
+ this->linear = true;
+ else
+ this->linear = false;
+
+ if (dir)
+ {
+ // along Y
+
+ if ((w0 & 0x1) && !(w1 & 0x1))
+ {
+ this->w0n = w0 - 1;
+ this->w0d = w0 + 1;
+ this->w1d = w1;
+ }
+ else
+ {
+ this->w0n = w0 & 0xFFFE;
+ this->w0d = w0 & 0xFFFE;
+ this->w1d = w1 & 0xFFFE;
+ }
+
+ this->shift = 9;
+ }
+ else
+ {
+ // along X
+
+ this->w0n = w0;
+ this->w0d = w0;
+ this->w1d = w1;
+
+ this->shift = 8;
+ }
}
void SetX(s32 x)
{
x -= x0;
this->x = x;
- if (xdiff != 0 && wdiff != 0)
+ if (xdiff != 0 && !linear)
{
- // TODO: hardware tests show that this method is too precise
- // I haven't yet figured out what the hardware does, though
-
- if (w1factor==0 || w0factor==0) { yfactor = 0; return; }
+ s64 num = ((s64)x * w0n) << shift;
+ s32 den = (x * w0d) + ((xdiff-x) * w1d);
- s64 num = ((s64)x << (shift + 40)) / w1factor;
- s64 denw0 = ((s64)(xdiff-x) << 40) / w0factor;
- s64 denw1 = num >> shift;
-
- s64 denom = denw0 + denw1;
- if (denom == 0)
- yfactor = 0;
- else
- {
- yfactor = (s32)(num / denom);
- }
+ // this seems to be a proper division on hardware :/
+ // I haven't been able to find cases that produce imperfect output
+ if (den == 0) yfactor = 0;
+ else yfactor = (s32)(num / den);
}
}
@@ -205,7 +238,7 @@ public:
{
if (xdiff == 0 || y0 == y1) return y0;
- if (wdiff != 0)
+ if (!linear)
{
// perspective-correct approx. interpolation
if (y0 < y1)
@@ -216,10 +249,11 @@ public:
else
{
// linear interpolation
+ // checkme: the rounding bias there (3<<24) is a guess
if (y0 < y1)
- return y0 + (((y1-y0) * x) / xdiff);
+ return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30);
else
- return y1 + (((y0-y1) * (xdiff-x)) / xdiff);
+ return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30);
}
}
@@ -227,9 +261,9 @@ public:
{
if (xdiff == 0 || z0 == z1) return z0;
- if ((wdiff != 0) && wbuffer)
+ if (wbuffer)
{
- // perspective-correct approx. interpolation
+ // W-buffering: perspective-correct approx. interpolation
if (z0 < z1)
return z0 + (((s64)(z1-z0) * yfactor) >> shift);
else
@@ -237,21 +271,52 @@ public:
}
else
{
- // linear interpolation
+ // Z-buffering: linear interpolation
+ // still doesn't quite match hardware...
+ s32 base, disp, factor;
+
if (z0 < z1)
- return z0 + (((s64)(z1-z0) * x) / xdiff);
+ {
+ base = z0;
+ disp = z1 - z0;
+ factor = x;
+ }
+ else
+ {
+ base = z1;
+ disp = z0 - z1,
+ factor = xdiff - x;
+ }
+
+ if (dir)
+ {
+ int shift = 0;
+ while (disp > 0x3FF)
+ {
+ disp >>= 1;
+ shift++;
+ }
+
+ return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift);
+ }
else
- return z1 + (((s64)(z0-z1) * (xdiff-x)) / xdiff);
+ {
+ disp >>= 9;
+ return base + (((s64)disp * factor * xrecip_z) >> 13);
+ }
}
}
private:
s32 x0, x1, xdiff, x;
- s64 w0factor, w1factor;
- s32 wdiff;
+
int shift;
+ bool linear;
+
+ s32 xrecip, xrecip_z;
+ s32 w0n, w0d, w1d;
- s32 yfactor;
+ u32 yfactor;
};
@@ -280,7 +345,7 @@ public:
Increment = 0;
XMajor = false;
- Interp.Setup(0, 0, 0, 0, 9);
+ Interp.Setup(0, 0, 0, 0);
Interp.SetX(0);
return x0;
@@ -347,8 +412,8 @@ public:
if (XMajor)
{
- if (side) Interp.Setup(x0-1, x1-1, w0, w1, 9); // checkme
- else Interp.Setup(x0, x1, w0, w1, 9);
+ if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme
+ else Interp.Setup(x0, x1, w0, w1);
Interp.SetX(x);
// used for calculating AA coverage
@@ -356,7 +421,7 @@ public:
}
else
{
- Interp.Setup(y0, y1, w0, w1, 9);
+ Interp.Setup(y0, y1, w0, w1);
Interp.SetX(y);
//ycov_incr = Increment >> 2;
@@ -434,7 +499,7 @@ public:
s32 Increment;
bool Negative;
bool XMajor;
- Interpolator Interp;
+ Interpolator<1> Interp;
private:
s32 x0, xmin, xmax;
@@ -1009,8 +1074,8 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
bool l_filledge, r_filledge;
s32 l_edgelen, r_edgelen;
s32 l_edgecov, r_edgecov;
- Interpolator* interp_start;
- Interpolator* interp_end;
+ Interpolator<1>* interp_start;
+ Interpolator<1>* interp_end;
xstart = rp->XL;
xend = rp->XR;
@@ -1103,7 +1168,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
int edge;
s32 x = xstart;
- Interpolator interpX(xstart, xend+1, wl, wr, 8);
+ Interpolator<0> interpX(xstart, xend+1, wl, wr);
if (x < 0) x = 0;
s32 xlimit;