Add comments, re-enable remaining tests (they do work, just forgot to add expected values)

hrydgard · hrydgard · commit 28a7912ae605 · 2023-02-11T01:11:02.000+01:00
diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp
@@ -502,22 +502,55 @@ ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) {
 static const float DEPTH_SLICE_FACTOR_HIGH = 4.0f;
 static const float DEPTH_SLICE_FACTOR_16BIT = 256.0f;
 
+// The supported flag combinations
+//
+// 0 - "Old"-style GL depth.
+//     Or "Non-accurate depth" : effectively ignore minz / maxz. Map Z values based on viewport, which clamps.
+//     This skews depth in many instances. Depth can be inverted in this mode if viewport says.
+//     This is completely wrong, but works in some cases (probably because some game devs assumed it was how it worked)
+//     and avoids some depth clamp issues.
+//
+// GPU_USE_ACCURATE_DEPTH:
+//     Accurate depth: Z in the framebuffer matches the range of Z used on the PSP linearly in some way. We choose
+//     a centered range, to simulate clamping by letting otherwise out-of-range pixels survive the 0 and 1 cutoffs.
+//     Clip depth based on minz/maxz, and viewport is just a means to scale and center the value, not clipping or mapping to stored values.
+//
+// GPU_USE_ACCURATE_DEPTH | GPU_USE_DEPTH_CLAMP:
+//     Variant of GPU_USE_ACCURATE_DEPTH, just the range is the nice and convenient 0-1 since we can use
+//     hardware depth clamp. only viable in accurate depth mode, clamps depth and therefore uses the full 0-1 range. Using the full 0-1 range is not what accurate means, it's implied by depth clamp (which also means we're clamping.)
+//
+// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT:
+// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT | GPU_USE_DEPTH_CLAMP:
+//     Only viable in accurate depth mode, means to use a range of the 24-bit depth values available
+//     from the GPU to represent the 16-bit values the PSP had, to try to make everything round and
+//     z-fight (close to) the same way as on hardware, cheaply (cheaper than rounding depth in fragment shader).
+//     We automatically switch to this if Z tests for equality are used.
+//     Depth clamp has no noticeable effect here if set.
+//
+// Any other combinations of these particular flags are bogus (like for example a lonely GPU_USE_DEPTH_CLAMP).
+
 float DepthSliceFactor(u32 useFlags) {
 	if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
+		// Old style depth.
 		return 1.0f;
 	}
 	if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {
+		// Accurate depth but 16-bit resolution, so squish.
 		return DEPTH_SLICE_FACTOR_16BIT;
 	}
 	if (useFlags & GPU_USE_DEPTH_CLAMP) {
+		// Accurate depth, but we can use the full range since clamping is available.
 		return 1.0f;
 	}
+
+	// Standard accurate depth.
 	return DEPTH_SLICE_FACTOR_HIGH;
 }
 
 // This is used for float values which might not be integers, but are in the integer scale of 0-65535.
 float ToScaledDepthFromIntegerScale(u32 useFlags, float z) {
 	if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
+		// Old style depth, shortcut.
 		return z * (1.0f / 65535.0f);
 	}
 
@@ -538,9 +571,9 @@ DepthScaleFactors GetDepthScaleFactors(u32 useFlags) {
 		return DepthScaleFactors(0.0f, 65535.0f);
 	}
 
-	const float depthSliceFactor = DepthSliceFactor(useFlags);
-	const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
-	return DepthScaleFactors(offset, depthSliceFactor * 65535.0f);
+	const double depthSliceFactor = DepthSliceFactor(useFlags);
+	const double offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
+	return DepthScaleFactors(offset, (float)(depthSliceFactor * 65535.0));
 }
 
 void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h
@@ -90,32 +90,36 @@ struct ViewportAndScissor {
 void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
 void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor);
 
+// NOTE: See the .cpp file for detailed comment about how the use flags are interpreted.
 class DepthScaleFactors {
 public:
 	// This should only be used from GetDepthScaleFactors.
 	DepthScaleFactors(double offset, double scale) : offset_(offset), scale_(scale) {}
 
 	// Decodes a value from a depth buffer to a value of range 0..65536
 	float DecodeToU16(float z) const {
-		return (z - offset_) * scale_;
+		return (float)((z - offset_) * scale_);
 	}
 
 	// Encodes a value from the range 0..65536 to a normalized depth value (0-1), in the
 	// range that we write to the depth buffer.
 	float EncodeFromU16(float z_u16) const {
-		return (z_u16 / scale_) + offset_;
+		return (float)(((double)z_u16 / scale_) + offset_);
 	}
 
 	float Offset() const { return (float)offset_; }
 	float ScaleU16() const { return (float)scale_; }
 	// float Scale() const { return scale_ / 65535.0f; }
 
 private:
+	// Doubles hardly cost anything these days, and precision matters here.
 	double offset_;
 	double scale_;
 };
 
 DepthScaleFactors GetDepthScaleFactors(u32 useFlags);
+
+// These two will be replaced with just DepthScaleFactors.
 float ToScaledDepthFromIntegerScale(u32 useFlags, float z);
 float DepthSliceFactor(u32 useFlags);
 
diff --git a/unittest/UnitTest.cpp b/unittest/UnitTest.cpp
@@ -806,17 +806,17 @@ static bool TestDepthMath() {
 	// TODO: What about GPU_USE_DEPTH_CLAMP? It basically overrides GPU_USE_ACCURATE_DEPTH?
 
 	// These are in normalized space.
-	static const volatile float testValues[] = { 0.0f, 0.1f, 0.5f, 0.9f, 1.0f };
+	static const volatile float testValues[] = { 0.0f, 0.1f, 0.5f, M_PI / 4.0f, 0.9f, 1.0f };
 
 	static const u32 useFlagsArray[] = {
 		0,
 		GPU_USE_ACCURATE_DEPTH,
 		GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT,
-		// GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH,  fails
-		// GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT,  fails
+		GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH,
+		GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT,  // Here, GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT should take precedence over USE_DEPTH_CLAMP.
 	};
-	static const float expectedScale[] = { 65535.0f, 262140.0f, 16776960.0f };
-	static const float expectedOffset[] = { 0.0f, 0.375f, 0.498047f };
+	static const float expectedScale[] = { 65535.0f, 262140.0f, 16776960.0f, 65535.0f, 16776960.0f, };
+	static const float expectedOffset[] = { 0.0f, 0.375f, 0.498047f, 0.0f, 0.498047f, };
 
 	EXPECT_REL_EQ_FLOAT(100000.0f, 100001.0f, 0.00001f);
 
@@ -835,7 +835,7 @@ static bool TestDepthMath() {
 			float encoded = factors.EncodeFromU16(testValue);
 			float decodedU16 = factors.DecodeToU16(encoded);
 			EXPECT_REL_EQ_FLOAT(decodedU16, testValue, 0.0001f);
-			EXPECT_REL_EQ_FLOAT(encoded, ToScaledDepthFromIntegerScale(useFlags, testValue), 0.00001f);
+			EXPECT_REL_EQ_FLOAT(encoded, ToScaledDepthFromIntegerScale(useFlags, testValue), 0.000001f);
 		}
 	}
 
diff --git a/unittest/UnitTest.h b/unittest/UnitTest.h
@@ -20,7 +20,7 @@ inline bool rel_equal(float a, float b, float precision) {
 #define EXPECT_EQ_HEX(a, b) if ((a) != (b)) { printf("%s:%i: Test Fail\n%x\nvs\n%x\n", __FUNCTION__, __LINE__, a, b); return false; }
 #define EXPECT_EQ_FLOAT(a, b) if ((a) != (b)) { printf("%s:%i: Test Fail\n%0.7f\nvs\n%0.7f\n", __FUNCTION__, __LINE__, a, b); return false; }
 #define EXPECT_APPROX_EQ_FLOAT(a, b) if (fabsf((a)-(b))>0.00001f) { printf("%s:%i: Test Fail\n%f\nvs\n%f\n", __FUNCTION__, __LINE__, a, b); /*return false;*/ }
-#define EXPECT_REL_EQ_FLOAT(a, b, precision) if (!rel_equal(a, b, precision)) { printf("%s:%i: Test Fail\n%0.7f\nvs\n%0.7f\n", __FUNCTION__, __LINE__, a, b); /*return false;*/ }
+#define EXPECT_REL_EQ_FLOAT(a, b, precision) if (!rel_equal(a, b, precision)) { printf("%s:%i: Test Fail\n%0.9f\nvs\n%0.9f\n", __FUNCTION__, __LINE__, a, b); /*return false;*/ }
 #define EXPECT_EQ_STR(a, b) if (a != b) { printf("%s: Test Fail\n%s\nvs\n%s\n", __FUNCTION__, a.c_str(), b.c_str()); return false; }
 
 #define RET(a) if (!(a)) { return false; }