Skip to content

Commit 28a7912

Browse files
committed
Add comments, re-enable remaining tests (they do work, just forgot to add expected values)
1 parent 8d40684 commit 28a7912

File tree

4 files changed

+49
-12
lines changed

4 files changed

+49
-12
lines changed

GPU/Common/GPUStateUtils.cpp

+36-3
Original file line numberDiff line numberDiff line change
@@ -502,22 +502,55 @@ ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) {
502502
static const float DEPTH_SLICE_FACTOR_HIGH = 4.0f;
503503
static const float DEPTH_SLICE_FACTOR_16BIT = 256.0f;
504504

505+
// The supported flag combinations
506+
//
507+
// 0 - "Old"-style GL depth.
508+
// Or "Non-accurate depth" : effectively ignore minz / maxz. Map Z values based on viewport, which clamps.
509+
// This skews depth in many instances. Depth can be inverted in this mode if viewport says.
510+
// This is completely wrong, but works in some cases (probably because some game devs assumed it was how it worked)
511+
// and avoids some depth clamp issues.
512+
//
513+
// GPU_USE_ACCURATE_DEPTH:
514+
// Accurate depth: Z in the framebuffer matches the range of Z used on the PSP linearly in some way. We choose
515+
// a centered range, to simulate clamping by letting otherwise out-of-range pixels survive the 0 and 1 cutoffs.
516+
// Clip depth based on minz/maxz, and viewport is just a means to scale and center the value, not clipping or mapping to stored values.
517+
//
518+
// GPU_USE_ACCURATE_DEPTH | GPU_USE_DEPTH_CLAMP:
519+
// Variant of GPU_USE_ACCURATE_DEPTH, just the range is the nice and convenient 0-1 since we can use
520+
// hardware depth clamp. only viable in accurate depth mode, clamps depth and therefore uses the full 0-1 range. Using the full 0-1 range is not what accurate means, it's implied by depth clamp (which also means we're clamping.)
521+
//
522+
// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT:
523+
// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT | GPU_USE_DEPTH_CLAMP:
524+
// Only viable in accurate depth mode, means to use a range of the 24-bit depth values available
525+
// from the GPU to represent the 16-bit values the PSP had, to try to make everything round and
526+
// z-fight (close to) the same way as on hardware, cheaply (cheaper than rounding depth in fragment shader).
527+
// We automatically switch to this if Z tests for equality are used.
528+
// Depth clamp has no noticeable effect here if set.
529+
//
530+
// Any other combinations of these particular flags are bogus (like for example a lonely GPU_USE_DEPTH_CLAMP).
531+
505532
float DepthSliceFactor(u32 useFlags) {
506533
if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
534+
// Old style depth.
507535
return 1.0f;
508536
}
509537
if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {
538+
// Accurate depth but 16-bit resolution, so squish.
510539
return DEPTH_SLICE_FACTOR_16BIT;
511540
}
512541
if (useFlags & GPU_USE_DEPTH_CLAMP) {
542+
// Accurate depth, but we can use the full range since clamping is available.
513543
return 1.0f;
514544
}
545+
546+
// Standard accurate depth.
515547
return DEPTH_SLICE_FACTOR_HIGH;
516548
}
517549

518550
// This is used for float values which might not be integers, but are in the integer scale of 0-65535.
519551
float ToScaledDepthFromIntegerScale(u32 useFlags, float z) {
520552
if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
553+
// Old style depth, shortcut.
521554
return z * (1.0f / 65535.0f);
522555
}
523556

@@ -538,9 +571,9 @@ DepthScaleFactors GetDepthScaleFactors(u32 useFlags) {
538571
return DepthScaleFactors(0.0f, 65535.0f);
539572
}
540573

541-
const float depthSliceFactor = DepthSliceFactor(useFlags);
542-
const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
543-
return DepthScaleFactors(offset, depthSliceFactor * 65535.0f);
574+
const double depthSliceFactor = DepthSliceFactor(useFlags);
575+
const double offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
576+
return DepthScaleFactors(offset, (float)(depthSliceFactor * 65535.0));
544577
}
545578

546579
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {

GPU/Common/GPUStateUtils.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -90,32 +90,36 @@ struct ViewportAndScissor {
9090
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
9191
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor);
9292

93+
// NOTE: See the .cpp file for detailed comment about how the use flags are interpreted.
9394
class DepthScaleFactors {
9495
public:
9596
// This should only be used from GetDepthScaleFactors.
9697
DepthScaleFactors(double offset, double scale) : offset_(offset), scale_(scale) {}
9798

9899
// Decodes a value from a depth buffer to a value of range 0..65536
99100
float DecodeToU16(float z) const {
100-
return (z - offset_) * scale_;
101+
return (float)((z - offset_) * scale_);
101102
}
102103

103104
// Encodes a value from the range 0..65536 to a normalized depth value (0-1), in the
104105
// range that we write to the depth buffer.
105106
float EncodeFromU16(float z_u16) const {
106-
return (z_u16 / scale_) + offset_;
107+
return (float)(((double)z_u16 / scale_) + offset_);
107108
}
108109

109110
float Offset() const { return (float)offset_; }
110111
float ScaleU16() const { return (float)scale_; }
111112
// float Scale() const { return scale_ / 65535.0f; }
112113

113114
private:
115+
// Doubles hardly cost anything these days, and precision matters here.
114116
double offset_;
115117
double scale_;
116118
};
117119

118120
DepthScaleFactors GetDepthScaleFactors(u32 useFlags);
121+
122+
// These two will be replaced with just DepthScaleFactors.
119123
float ToScaledDepthFromIntegerScale(u32 useFlags, float z);
120124
float DepthSliceFactor(u32 useFlags);
121125

unittest/UnitTest.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -806,17 +806,17 @@ static bool TestDepthMath() {
806806
// TODO: What about GPU_USE_DEPTH_CLAMP? It basically overrides GPU_USE_ACCURATE_DEPTH?
807807

808808
// These are in normalized space.
809-
static const volatile float testValues[] = { 0.0f, 0.1f, 0.5f, 0.9f, 1.0f };
809+
static const volatile float testValues[] = { 0.0f, 0.1f, 0.5f, M_PI / 4.0f, 0.9f, 1.0f };
810810

811811
static const u32 useFlagsArray[] = {
812812
0,
813813
GPU_USE_ACCURATE_DEPTH,
814814
GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT,
815-
// GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH, fails
816-
// GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT, fails
815+
GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH,
816+
GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT, // Here, GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT should take precedence over USE_DEPTH_CLAMP.
817817
};
818-
static const float expectedScale[] = { 65535.0f, 262140.0f, 16776960.0f };
819-
static const float expectedOffset[] = { 0.0f, 0.375f, 0.498047f };
818+
static const float expectedScale[] = { 65535.0f, 262140.0f, 16776960.0f, 65535.0f, 16776960.0f, };
819+
static const float expectedOffset[] = { 0.0f, 0.375f, 0.498047f, 0.0f, 0.498047f, };
820820

821821
EXPECT_REL_EQ_FLOAT(100000.0f, 100001.0f, 0.00001f);
822822

@@ -835,7 +835,7 @@ static bool TestDepthMath() {
835835
float encoded = factors.EncodeFromU16(testValue);
836836
float decodedU16 = factors.DecodeToU16(encoded);
837837
EXPECT_REL_EQ_FLOAT(decodedU16, testValue, 0.0001f);
838-
EXPECT_REL_EQ_FLOAT(encoded, ToScaledDepthFromIntegerScale(useFlags, testValue), 0.00001f);
838+
EXPECT_REL_EQ_FLOAT(encoded, ToScaledDepthFromIntegerScale(useFlags, testValue), 0.000001f);
839839
}
840840
}
841841

unittest/UnitTest.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ inline bool rel_equal(float a, float b, float precision) {
2020
#define EXPECT_EQ_HEX(a, b) if ((a) != (b)) { printf("%s:%i: Test Fail\n%x\nvs\n%x\n", __FUNCTION__, __LINE__, a, b); return false; }
2121
#define EXPECT_EQ_FLOAT(a, b) if ((a) != (b)) { printf("%s:%i: Test Fail\n%0.7f\nvs\n%0.7f\n", __FUNCTION__, __LINE__, a, b); return false; }
2222
#define EXPECT_APPROX_EQ_FLOAT(a, b) if (fabsf((a)-(b))>0.00001f) { printf("%s:%i: Test Fail\n%f\nvs\n%f\n", __FUNCTION__, __LINE__, a, b); /*return false;*/ }
23-
#define EXPECT_REL_EQ_FLOAT(a, b, precision) if (!rel_equal(a, b, precision)) { printf("%s:%i: Test Fail\n%0.7f\nvs\n%0.7f\n", __FUNCTION__, __LINE__, a, b); /*return false;*/ }
23+
#define EXPECT_REL_EQ_FLOAT(a, b, precision) if (!rel_equal(a, b, precision)) { printf("%s:%i: Test Fail\n%0.9f\nvs\n%0.9f\n", __FUNCTION__, __LINE__, a, b); /*return false;*/ }
2424
#define EXPECT_EQ_STR(a, b) if (a != b) { printf("%s: Test Fail\n%s\nvs\n%s\n", __FUNCTION__, a.c_str(), b.c_str()); return false; }
2525

2626
#define RET(a) if (!(a)) { return false; }

0 commit comments

Comments
 (0)