Skip to content

Commit 9d3ba25

Browse files
fix various issues with DDS decompression (scp-fs2open#7022)
Fix heap corruption with smaller mipmaps due to decompression output always being a 4x4 block rather than actual mipmap size. It's necessary to pad the allocated data size to allow for the overflow. Fix heap corruption with DXT1 due to using improper data offsets for the type. Fix data offsets not always taking the depth value into account. Adjust code for slight increase in decode performance. Update bcdec to 0.98
1 parent fc10205 commit 9d3ba25

File tree

2 files changed

+247
-51
lines changed

2 files changed

+247
-51
lines changed

code/ddsutils/bcdec.h

Lines changed: 190 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* bcdec.h - v0.96
1+
/* bcdec.h - v0.97
22
provides functions to decompress blocks of BC compressed images
33
written by Sergii "iOrange" Kudlai in 2022
44
@@ -23,13 +23,22 @@
2323
2424
For more info, issues and suggestions please visit https://github.com/iOrange/bcdec
2525
26+
Configuration:
27+
#define BCDEC_BC4BC5_PRECISE:
28+
enables more precise but slower BC4/BC5 decoding + signed/unsigned mode
29+
2630
CREDITS:
2731
Aras Pranckevicius (@aras-p) - BC1/BC3 decoders optimizations (up to 3x the speed)
2832
- BC6H/BC7 bits pulling routines optimizations
2933
- optimized BC6H by moving unquantize out of the loop
3034
- Split BC6H decompression function into 'half' and
3135
'float' variants
3236
37+
Michael Schmidt (@RunDevelopment) - Found better "magic" coefficients for integer interpolation
38+
of reference colors in BC1 color block, that match with
39+
the floating point interpolation. This also made it faster
40+
than integer division by 3!
41+
3342
bugfixes:
3443
@linkmauve
3544
@@ -39,6 +48,9 @@
3948
#ifndef BCDEC_HEADER_INCLUDED
4049
#define BCDEC_HEADER_INCLUDED
4150

51+
#define BCDEC_VERSION_MAJOR 0
52+
#define BCDEC_VERSION_MINOR 98
53+
4254
/* if BCDEC_STATIC causes problems, try defining BCDECDEF to 'inline' or 'static inline' */
4355
#ifndef BCDECDEF
4456
#ifdef BCDEC_STATIC
@@ -90,12 +102,20 @@
90102
BCDECDEF void bcdec_bc1(const void* compressedBlock, void* decompressedBlock, int destinationPitch);
91103
BCDECDEF void bcdec_bc2(const void* compressedBlock, void* decompressedBlock, int destinationPitch);
92104
BCDECDEF void bcdec_bc3(const void* compressedBlock, void* decompressedBlock, int destinationPitch);
105+
#ifndef BCDEC_BC4BC5_PRECISE
93106
BCDECDEF void bcdec_bc4(const void* compressedBlock, void* decompressedBlock, int destinationPitch);
94107
BCDECDEF void bcdec_bc5(const void* compressedBlock, void* decompressedBlock, int destinationPitch);
108+
#else
109+
BCDECDEF void bcdec_bc4(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned);
110+
BCDECDEF void bcdec_bc5(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned);
111+
BCDECDEF void bcdec_bc4_float(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned);
112+
BCDECDEF void bcdec_bc5_float(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned);
113+
#endif
95114
BCDECDEF void bcdec_bc6h_float(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned);
96115
BCDECDEF void bcdec_bc6h_half(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned);
97116
BCDECDEF void bcdec_bc7(const void* compressedBlock, void* decompressedBlock, int destinationPitch);
98117

118+
#endif /* BCDEC_HEADER_INCLUDED */
99119

100120
#ifdef BCDEC_IMPLEMENTATION
101121

@@ -110,35 +130,44 @@ static void bcdec__color_block(const void* compressedBlock, void* decompressedBl
110130
c0 = ((unsigned short*)compressedBlock)[0];
111131
c1 = ((unsigned short*)compressedBlock)[1];
112132

133+
/* Unpack 565 ref colors */
134+
r0 = (c0 >> 11) & 0x1F;
135+
g0 = (c0 >> 5) & 0x3F;
136+
b0 = c0 & 0x1F;
137+
138+
r1 = (c1 >> 11) & 0x1F;
139+
g1 = (c1 >> 5) & 0x3F;
140+
b1 = c1 & 0x1F;
141+
113142
/* Expand 565 ref colors to 888 */
114-
r0 = (((c0 >> 11) & 0x1F) * 527 + 23) >> 6;
115-
g0 = (((c0 >> 5) & 0x3F) * 259 + 33) >> 6;
116-
b0 = ((c0 & 0x1F) * 527 + 23) >> 6;
117-
refColors[0] = 0xFF000000 | (b0 << 16) | (g0 << 8) | r0;
143+
r = (r0 * 527 + 23) >> 6;
144+
g = (g0 * 259 + 33) >> 6;
145+
b = (b0 * 527 + 23) >> 6;
146+
refColors[0] = 0xFF000000 | (b << 16) | (g << 8) | r;
118147

119-
r1 = (((c1 >> 11) & 0x1F) * 527 + 23) >> 6;
120-
g1 = (((c1 >> 5) & 0x3F) * 259 + 33) >> 6;
121-
b1 = ((c1 & 0x1F) * 527 + 23) >> 6;
122-
refColors[1] = 0xFF000000 | (b1 << 16) | (g1 << 8) | r1;
148+
r = (r1 * 527 + 23) >> 6;
149+
g = (g1 * 259 + 33) >> 6;
150+
b = (b1 * 527 + 23) >> 6;
151+
refColors[1] = 0xFF000000 | (b << 16) | (g << 8) | r;
123152

124153
if (c0 > c1 || onlyOpaqueMode) { /* Standard BC1 mode (also BC3 color block uses ONLY this mode) */
125154
/* color_2 = 2/3*color_0 + 1/3*color_1
126155
color_3 = 1/3*color_0 + 2/3*color_1 */
127-
r = (2 * r0 + r1 + 1) / 3;
128-
g = (2 * g0 + g1 + 1) / 3;
129-
b = (2 * b0 + b1 + 1) / 3;
156+
r = ((2 * r0 + r1) * 351 + 61) >> 7;
157+
g = ((2 * g0 + g1) * 2763 + 1039) >> 11;
158+
b = ((2 * b0 + b1) * 351 + 61) >> 7;
130159
refColors[2] = 0xFF000000 | (b << 16) | (g << 8) | r;
131160

132-
r = (r0 + 2 * r1 + 1) / 3;
133-
g = (g0 + 2 * g1 + 1) / 3;
134-
b = (b0 + 2 * b1 + 1) / 3;
161+
r = ((r0 + r1 * 2) * 351 + 61) >> 7;
162+
g = ((g0 + g1 * 2) * 2763 + 1039) >> 11;
163+
b = ((b0 + b1 * 2) * 351 + 61) >> 7;
135164
refColors[3] = 0xFF000000 | (b << 16) | (g << 8) | r;
136165
} else { /* Quite rare BC1A mode */
137166
/* color_2 = 1/2*color_0 + 1/2*color_1;
138167
color_3 = 0; */
139-
r = (r0 + r1 + 1) >> 1;
140-
g = (g0 + g1 + 1) >> 1;
141-
b = (b0 + b1 + 1) >> 1;
168+
r = ((r0 + r1) * 1053 + 125) >> 8;
169+
g = ((g0 + g1) * 4145 + 1019) >> 11;
170+
b = ((b0 + b1) * 1053 + 125) >> 8;
142171
refColors[2] = 0xFF000000 | (b << 16) | (g << 8) | r;
143172

144173
refColors[3] = 0x00000000;
@@ -190,19 +219,19 @@ static void bcdec__smooth_alpha_block(const void* compressedBlock, void* decompr
190219

191220
if (alpha[0] > alpha[1]) {
192221
/* 6 interpolated alpha values. */
193-
alpha[2] = (6 * alpha[0] + alpha[1] + 1) / 7; /* 6/7*alpha_0 + 1/7*alpha_1 */
194-
alpha[3] = (5 * alpha[0] + 2 * alpha[1] + 1) / 7; /* 5/7*alpha_0 + 2/7*alpha_1 */
195-
alpha[4] = (4 * alpha[0] + 3 * alpha[1] + 1) / 7; /* 4/7*alpha_0 + 3/7*alpha_1 */
196-
alpha[5] = (3 * alpha[0] + 4 * alpha[1] + 1) / 7; /* 3/7*alpha_0 + 4/7*alpha_1 */
197-
alpha[6] = (2 * alpha[0] + 5 * alpha[1] + 1) / 7; /* 2/7*alpha_0 + 5/7*alpha_1 */
198-
alpha[7] = ( alpha[0] + 6 * alpha[1] + 1) / 7; /* 1/7*alpha_0 + 6/7*alpha_1 */
222+
alpha[2] = (6 * alpha[0] + alpha[1]) / 7; /* 6/7*alpha_0 + 1/7*alpha_1 */
223+
alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; /* 5/7*alpha_0 + 2/7*alpha_1 */
224+
alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; /* 4/7*alpha_0 + 3/7*alpha_1 */
225+
alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; /* 3/7*alpha_0 + 4/7*alpha_1 */
226+
alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; /* 2/7*alpha_0 + 5/7*alpha_1 */
227+
alpha[7] = ( alpha[0] + 6 * alpha[1]) / 7; /* 1/7*alpha_0 + 6/7*alpha_1 */
199228
}
200229
else {
201230
/* 4 interpolated alpha values. */
202-
alpha[2] = (4 * alpha[0] + alpha[1] + 1) / 5; /* 4/5*alpha_0 + 1/5*alpha_1 */
203-
alpha[3] = (3 * alpha[0] + 2 * alpha[1] + 1) / 5; /* 3/5*alpha_0 + 2/5*alpha_1 */
204-
alpha[4] = (2 * alpha[0] + 3 * alpha[1] + 1) / 5; /* 2/5*alpha_0 + 3/5*alpha_1 */
205-
alpha[5] = ( alpha[0] + 4 * alpha[1] + 1) / 5; /* 1/5*alpha_0 + 4/5*alpha_1 */
231+
alpha[2] = (4 * alpha[0] + alpha[1]) / 5; /* 4/5*alpha_0 + 1/5*alpha_1 */
232+
alpha[3] = (3 * alpha[0] + 2 * alpha[1]) / 5; /* 3/5*alpha_0 + 2/5*alpha_1 */
233+
alpha[4] = (2 * alpha[0] + 3 * alpha[1]) / 5; /* 2/5*alpha_0 + 3/5*alpha_1 */
234+
alpha[5] = ( alpha[0] + 4 * alpha[1]) / 5; /* 1/5*alpha_0 + 4/5*alpha_1 */
206235
alpha[6] = 0x00;
207236
alpha[7] = 0xFF;
208237
}
@@ -218,6 +247,117 @@ static void bcdec__smooth_alpha_block(const void* compressedBlock, void* decompr
218247
}
219248
}
220249

250+
#ifdef BCDEC_BC4BC5_PRECISE
251+
static void bcdec__bc4_block(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int pixelSize, int isSigned) {
252+
signed char* sblock;
253+
unsigned char* ublock;
254+
int alpha[8];
255+
int i, j;
256+
unsigned long long block, indices;
257+
258+
static int aWeights4[4] = { 13107, 26215, 39321, 52429 };
259+
static int aWeights6[6] = { 9363, 18724, 28086, 37450, 46812, 56173 };
260+
261+
block = *(unsigned long long*)compressedBlock;
262+
263+
if (isSigned) {
264+
alpha[0] = (char)(block & 0xFF);
265+
alpha[1] = (char)((block >> 8) & 0xFF);
266+
if (alpha[0] < -127) alpha[0] = -127; /* -128 clamps to -127 */
267+
if (alpha[1] < -127) alpha[1] = -127; /* -128 clamps to -127 */
268+
} else {
269+
alpha[0] = block & 0xFF;
270+
alpha[1] = (block >> 8) & 0xFF;
271+
}
272+
273+
if (alpha[0] > alpha[1]) {
274+
/* 6 interpolated alpha values. */
275+
alpha[2] = (aWeights6[5] * alpha[0] + aWeights6[0] * alpha[1] + 32768) >> 16; /* 6/7*alpha_0 + 1/7*alpha_1 */
276+
alpha[3] = (aWeights6[4] * alpha[0] + aWeights6[1] * alpha[1] + 32768) >> 16; /* 5/7*alpha_0 + 2/7*alpha_1 */
277+
alpha[4] = (aWeights6[3] * alpha[0] + aWeights6[2] * alpha[1] + 32768) >> 16; /* 4/7*alpha_0 + 3/7*alpha_1 */
278+
alpha[5] = (aWeights6[2] * alpha[0] + aWeights6[3] * alpha[1] + 32768) >> 16; /* 3/7*alpha_0 + 4/7*alpha_1 */
279+
alpha[6] = (aWeights6[1] * alpha[0] + aWeights6[4] * alpha[1] + 32768) >> 16; /* 2/7*alpha_0 + 5/7*alpha_1 */
280+
alpha[7] = (aWeights6[0] * alpha[0] + aWeights6[5] * alpha[1] + 32768) >> 16; /* 1/7*alpha_0 + 6/7*alpha_1 */
281+
} else {
282+
/* 4 interpolated alpha values. */
283+
alpha[2] = (aWeights4[3] * alpha[0] + aWeights4[0] * alpha[1] + 32768) >> 16; /* 4/5*alpha_0 + 1/5*alpha_1 */
284+
alpha[3] = (aWeights4[2] * alpha[0] + aWeights4[1] * alpha[1] + 32768) >> 16; /* 3/5*alpha_0 + 2/5*alpha_1 */
285+
alpha[4] = (aWeights4[1] * alpha[0] + aWeights4[2] * alpha[1] + 32768) >> 16; /* 2/5*alpha_0 + 3/5*alpha_1 */
286+
alpha[5] = (aWeights4[0] * alpha[0] + aWeights4[3] * alpha[1] + 32768) >> 16; /* 1/5*alpha_0 + 4/5*alpha_1 */
287+
alpha[6] = isSigned ? -127 : 0;
288+
alpha[7] = isSigned ? 127 : 255;
289+
}
290+
291+
indices = block >> 16;
292+
if (isSigned) {
293+
sblock = (char*)decompressedBlock;
294+
for (i = 0; i < 4; ++i) {
295+
for (j = 0; j < 4; ++j) {
296+
sblock[j * pixelSize] = (char)alpha[indices & 0x07];
297+
indices >>= 3;
298+
}
299+
sblock += destinationPitch;
300+
}
301+
} else {
302+
ublock = (unsigned char*)decompressedBlock;
303+
for (i = 0; i < 4; ++i) {
304+
for (j = 0; j < 4; ++j) {
305+
ublock[j * pixelSize] = (unsigned char)alpha[indices & 0x07];
306+
indices >>= 3;
307+
}
308+
ublock += destinationPitch;
309+
}
310+
}
311+
}
312+
313+
static void bcdec__bc4_block_float(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int pixelSize, int isSigned) {
314+
float* decompressed;
315+
float alpha[8];
316+
int i, j;
317+
unsigned long long block, indices;
318+
319+
block = *(unsigned long long*)compressedBlock;
320+
decompressed = (float*)decompressedBlock;
321+
322+
if (isSigned) {
323+
alpha[0] = (float)((char)(block & 0xFF)) / 127.0f;
324+
alpha[1] = (float)((char)((block >> 8) & 0xFF)) / 127.0f;
325+
if (alpha[0] < -1.0f) alpha[0] = -1.0f; /* -128 clamps to -127 */
326+
if (alpha[1] < -1.0f) alpha[1] = -1.0f; /* -128 clamps to -127 */
327+
} else {
328+
alpha[0] = (float)(block & 0xFF) / 255.0f;
329+
alpha[1] = (float)((block >> 8) & 0xFF) / 255.0f;
330+
}
331+
332+
if (alpha[0] > alpha[1]) {
333+
/* 6 interpolated alpha values. */
334+
alpha[2] = (6.0f * alpha[0] + alpha[1]) / 7.0f; /* 6/7*alpha_0 + 1/7*alpha_1 */
335+
alpha[3] = (5.0f * alpha[0] + 2.0f * alpha[1]) / 7.0f; /* 5/7*alpha_0 + 2/7*alpha_1 */
336+
alpha[4] = (4.0f * alpha[0] + 3.0f * alpha[1]) / 7.0f; /* 4/7*alpha_0 + 3/7*alpha_1 */
337+
alpha[5] = (3.0f * alpha[0] + 4.0f * alpha[1]) / 7.0f; /* 3/7*alpha_0 + 4/7*alpha_1 */
338+
alpha[6] = (2.0f * alpha[0] + 5.0f * alpha[1]) / 7.0f; /* 2/7*alpha_0 + 5/7*alpha_1 */
339+
alpha[7] = ( alpha[0] + 6.0f * alpha[1]) / 7.0f; /* 1/7*alpha_0 + 6/7*alpha_1 */
340+
} else {
341+
/* 4 interpolated alpha values. */
342+
alpha[2] = (4.0f * alpha[0] + alpha[1]) / 5.0f; /* 4/5*alpha_0 + 1/5*alpha_1 */
343+
alpha[3] = (3.0f * alpha[0] + 2.0f * alpha[1]) / 5.0f; /* 3/5*alpha_0 + 2/5*alpha_1 */
344+
alpha[4] = (2.0f * alpha[0] + 3.0f * alpha[1]) / 5.0f; /* 2/5*alpha_0 + 3/5*alpha_1 */
345+
alpha[5] = ( alpha[0] + 4.0f * alpha[1]) / 5.0f; /* 1/5*alpha_0 + 4/5*alpha_1 */
346+
alpha[6] = isSigned ? -1.0f : 0.0f;
347+
alpha[7] = 1.0f;
348+
}
349+
350+
indices = block >> 16;
351+
for (i = 0; i < 4; ++i) {
352+
for (j = 0; j < 4; ++j) {
353+
decompressed[j * pixelSize] = alpha[indices & 0x07];
354+
indices >>= 3;
355+
}
356+
decompressed += destinationPitch;
357+
}
358+
}
359+
#endif /* BCDEC_BC4BC5_PRECISE */
360+
221361
typedef struct bcdec__bitstream {
222362
unsigned long long low;
223363
unsigned long long high;
@@ -270,15 +410,37 @@ BCDECDEF void bcdec_bc3(const void* compressedBlock, void* decompressedBlock, in
270410
bcdec__smooth_alpha_block(compressedBlock, ((char*)decompressedBlock) + 3, destinationPitch, 4);
271411
}
272412

413+
#ifndef BCDEC_BC4BC5_PRECISE
273414
BCDECDEF void bcdec_bc4(const void* compressedBlock, void* decompressedBlock, int destinationPitch) {
274415
bcdec__smooth_alpha_block(compressedBlock, decompressedBlock, destinationPitch, 1);
416+
#else
417+
BCDECDEF void bcdec_bc4(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned) {
418+
bcdec__bc4_block(compressedBlock, decompressedBlock, destinationPitch, 1, isSigned);
419+
#endif
275420
}
276421

422+
#ifndef BCDEC_BC4BC5_PRECISE
277423
BCDECDEF void bcdec_bc5(const void* compressedBlock, void* decompressedBlock, int destinationPitch) {
278424
bcdec__smooth_alpha_block(compressedBlock, decompressedBlock, destinationPitch, 2);
279425
bcdec__smooth_alpha_block(((char*)compressedBlock) + 8, ((char*)decompressedBlock) + 1, destinationPitch, 2);
426+
#else
427+
BCDECDEF void bcdec_bc5(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned) {
428+
bcdec__bc4_block(compressedBlock, decompressedBlock, destinationPitch, 2, isSigned);
429+
bcdec__bc4_block(((char*)compressedBlock) + 8, ((char*)decompressedBlock) + 1, destinationPitch, 2, isSigned);
430+
#endif
431+
}
432+
433+
#ifdef BCDEC_BC4BC5_PRECISE
434+
BCDECDEF void bcdec_bc4_float(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned) {
435+
bcdec__bc4_block_float(compressedBlock, decompressedBlock, destinationPitch, 1, isSigned);
280436
}
281437

438+
BCDECDEF void bcdec_bc5_float(const void* compressedBlock, void* decompressedBlock, int destinationPitch, int isSigned) {
439+
bcdec__bc4_block_float(compressedBlock, decompressedBlock, destinationPitch, 2, isSigned);
440+
bcdec__bc4_block_float(((char*)compressedBlock) + 8, ((float*)decompressedBlock) + 1, destinationPitch, 2, isSigned);
441+
}
442+
#endif /* BCDEC_BC4BC5_PRECISE */
443+
282444
/* http://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend */
283445
static int bcdec__extend_sign(int val, int bits) {
284446
return (val << (32 - bits)) >> (32 - bits);
@@ -1269,8 +1431,6 @@ BCDECDEF void bcdec_bc7(const void* compressedBlock, void* decompressedBlock, in
12691431

12701432
#endif /* BCDEC_IMPLEMENTATION */
12711433

1272-
#endif /* BCDEC_HEADER_INCLUDED */
1273-
12741434
/* LICENSE:
12751435
12761436
This software is available under 2 licenses -- choose whichever you prefer.

0 commit comments

Comments
 (0)