Skip to content

Commit 3cbcf7b

Browse files
authored
Merge pull request #5 from pygame-community/blit-premul-avx2
Add an AVX2 version of the BLEND_PREMULTIPLIED blend mode
2 parents 0449cf5 + 3b4b8f4 commit 3cbcf7b

File tree

4 files changed

+318
-95
lines changed

4 files changed

+318
-95
lines changed

src_c/alphablit.c

Lines changed: 20 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,6 @@ blit_blend_rgba_max(SDL_BlitInfo *info);
109109

110110
static void
111111
blit_blend_premultiplied(SDL_BlitInfo *info);
112-
#ifdef __MMX__
113-
static void
114-
blit_blend_premultiplied_mmx(SDL_BlitInfo *info);
115-
#endif /* __MMX__ */
116112

117113
static int
118114
SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst,
@@ -567,27 +563,33 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst,
567563
break;
568564
}
569565
case PYGAME_BLEND_PREMULTIPLIED: {
566+
#if !defined(__EMSCRIPTEN__)
567+
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
570568
if (src->format->BytesPerPixel == 4 &&
571569
dst->format->BytesPerPixel == 4 &&
572570
src->format->Rmask == dst->format->Rmask &&
573571
src->format->Gmask == dst->format->Gmask &&
574572
src->format->Bmask == dst->format->Bmask &&
575-
info.src_blend != SDL_BLENDMODE_NONE) {
576-
#if defined(__MMX__) || defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)
573+
info.src_blend != SDL_BLENDMODE_NONE &&
574+
pg_has_avx2() && (src != dst)) {
575+
blit_blend_premultiplied_avx2(&info);
576+
break;
577+
}
577578
#if PG_ENABLE_SSE_NEON
578-
if (pg_HasSSE_NEON()) {
579-
blit_blend_premultiplied_sse2(&info);
580-
break;
581-
}
582-
#endif /* PG_ENABLE_SSE_NEON */
583-
#ifdef __MMX__
584-
if (SDL_HasMMX() == SDL_TRUE) {
585-
blit_blend_premultiplied_mmx(&info);
586-
break;
587-
}
588-
#endif /*__MMX__*/
589-
#endif /*__MMX__ || __SSE2__ || PG_ENABLE_ARM_NEON*/
579+
if (src->format->BytesPerPixel == 4 &&
580+
dst->format->BytesPerPixel == 4 &&
581+
src->format->Rmask == dst->format->Rmask &&
582+
src->format->Gmask == dst->format->Gmask &&
583+
src->format->Bmask == dst->format->Bmask &&
584+
src->format->Amask == 0xFF000000 &&
585+
info.src_blend != SDL_BLENDMODE_NONE &&
586+
pg_HasSSE_NEON() && (src != dst)) {
587+
blit_blend_premultiplied_sse2(&info);
588+
break;
590589
}
590+
#endif /* PG_ENABLE_SSE_NEON */
591+
#endif /* SDL_BYTEORDER == SDL_LIL_ENDIAN */
592+
#endif /* __EMSCRIPTEN__ */
591593

592594
blit_blend_premultiplied(&info);
593595
break;
@@ -1262,83 +1264,6 @@ blit_blend_rgba_max(SDL_BlitInfo *info)
12621264
}
12631265
}
12641266

1265-
#ifdef __MMX__
1266-
/* fast ARGB888->(A)RGB888 blending with pixel alpha */
1267-
static void
1268-
blit_blend_premultiplied_mmx(SDL_BlitInfo *info)
1269-
{
1270-
int n;
1271-
int width = info->width;
1272-
int height = info->height;
1273-
Uint32 *srcp = (Uint32 *)info->s_pixels;
1274-
int srcskip = info->s_skip >> 2;
1275-
Uint32 *dstp = (Uint32 *)info->d_pixels;
1276-
int dstskip = info->d_skip >> 2;
1277-
SDL_PixelFormat *srcfmt = info->src;
1278-
Uint32 amask = srcfmt->Amask;
1279-
Uint32 ashift = srcfmt->Ashift;
1280-
Uint64 multmask2;
1281-
1282-
__m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
1283-
1284-
mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
1285-
multmask2 = 0x00FF00FF00FF00FFULL;
1286-
1287-
while (height--) {
1288-
/* *INDENT-OFF* */
1289-
LOOP_UNROLLED4(
1290-
{
1291-
Uint32 alpha = *srcp & amask;
1292-
if (alpha == 0) {
1293-
/* do nothing */
1294-
}
1295-
else if (alpha == amask) {
1296-
*dstp = *srcp;
1297-
}
1298-
else {
1299-
src1 = _mm_cvtsi32_si64(
1300-
*srcp); /* src(ARGB) -> src1 (0000ARGB) */
1301-
src1 =
1302-
_mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
1303-
1304-
dst1 = _mm_cvtsi32_si64(
1305-
*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
1306-
dst1 =
1307-
_mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
1308-
1309-
mm_alpha = _mm_cvtsi32_si64(
1310-
alpha); /* alpha -> mm_alpha (0000000A) */
1311-
mm_alpha = _mm_srli_si64(
1312-
mm_alpha,
1313-
ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
1314-
mm_alpha = _mm_unpacklo_pi16(
1315-
mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
1316-
mm_alpha2 = _mm_unpacklo_pi32(
1317-
mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
1318-
mm_alpha2 = _mm_xor_si64(
1319-
mm_alpha2,
1320-
*(__m64 *)&multmask2); /* 255 - mm_alpha -> mm_alpha */
1321-
1322-
/* pre-multiplied alpha blend */
1323-
dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
1324-
dst1 = _mm_srli_pi16(dst1, 8);
1325-
dst1 = _mm_add_pi16(src1, dst1);
1326-
dst1 = _mm_packs_pu16(dst1, mm_zero);
1327-
1328-
*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
1329-
}
1330-
++srcp;
1331-
++dstp;
1332-
},
1333-
n, width);
1334-
/* *INDENT-ON* */
1335-
srcp += srcskip;
1336-
dstp += dstskip;
1337-
}
1338-
_mm_empty();
1339-
}
1340-
#endif /*__MMX__*/
1341-
13421267
static void
13431268
blit_blend_premultiplied(SDL_BlitInfo *info)
13441269
{

src_c/simd_blitters.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,5 @@ void
8282
blit_blend_rgba_min_avx2(SDL_BlitInfo *info);
8383
void
8484
blit_blend_rgb_min_avx2(SDL_BlitInfo *info);
85+
void
86+
blit_blend_premultiplied_avx2(SDL_BlitInfo *info);

0 commit comments

Comments
 (0)