@@ -109,10 +109,6 @@ blit_blend_rgba_max(SDL_BlitInfo *info);
109
109
110
110
static void
111
111
blit_blend_premultiplied (SDL_BlitInfo * info );
112
- #ifdef __MMX__
113
- static void
114
- blit_blend_premultiplied_mmx (SDL_BlitInfo * info );
115
- #endif /* __MMX__ */
116
112
117
113
static int
118
114
SoftBlitPyGame (SDL_Surface * src , SDL_Rect * srcrect , SDL_Surface * dst ,
@@ -567,27 +563,33 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst,
567
563
break ;
568
564
}
569
565
case PYGAME_BLEND_PREMULTIPLIED : {
566
+ #if !defined(__EMSCRIPTEN__ )
567
+ #if SDL_BYTEORDER == SDL_LIL_ENDIAN
570
568
if (src -> format -> BytesPerPixel == 4 &&
571
569
dst -> format -> BytesPerPixel == 4 &&
572
570
src -> format -> Rmask == dst -> format -> Rmask &&
573
571
src -> format -> Gmask == dst -> format -> Gmask &&
574
572
src -> format -> Bmask == dst -> format -> Bmask &&
575
- info .src_blend != SDL_BLENDMODE_NONE ) {
576
- #if defined(__MMX__ ) || defined(__SSE2__ ) || defined(PG_ENABLE_ARM_NEON )
573
+ info .src_blend != SDL_BLENDMODE_NONE &&
574
+ pg_has_avx2 () && (src != dst )) {
575
+ blit_blend_premultiplied_avx2 (& info );
576
+ break ;
577
+ }
577
578
#if PG_ENABLE_SSE_NEON
578
- if (pg_HasSSE_NEON ()) {
579
- blit_blend_premultiplied_sse2 (& info );
580
- break ;
581
- }
582
- #endif /* PG_ENABLE_SSE_NEON */
583
- #ifdef __MMX__
584
- if (SDL_HasMMX () == SDL_TRUE ) {
585
- blit_blend_premultiplied_mmx (& info );
586
- break ;
587
- }
588
- #endif /*__MMX__*/
589
- #endif /*__MMX__ || __SSE2__ || PG_ENABLE_ARM_NEON*/
579
+ if (src -> format -> BytesPerPixel == 4 &&
580
+ dst -> format -> BytesPerPixel == 4 &&
581
+ src -> format -> Rmask == dst -> format -> Rmask &&
582
+ src -> format -> Gmask == dst -> format -> Gmask &&
583
+ src -> format -> Bmask == dst -> format -> Bmask &&
584
+ src -> format -> Amask == 0xFF000000 &&
585
+ info .src_blend != SDL_BLENDMODE_NONE &&
586
+ pg_HasSSE_NEON () && (src != dst )) {
587
+ blit_blend_premultiplied_sse2 (& info );
588
+ break ;
590
589
}
590
+ #endif /* PG_ENABLE_SSE_NEON */
591
+ #endif /* SDL_BYTEORDER == SDL_LIL_ENDIAN */
592
+ #endif /* __EMSCRIPTEN__ */
591
593
592
594
blit_blend_premultiplied (& info );
593
595
break ;
@@ -1262,83 +1264,6 @@ blit_blend_rgba_max(SDL_BlitInfo *info)
1262
1264
}
1263
1265
}
1264
1266
1265
- #ifdef __MMX__
1266
- /* fast ARGB888->(A)RGB888 blending with pixel alpha */
1267
- static void
1268
- blit_blend_premultiplied_mmx (SDL_BlitInfo * info )
1269
- {
1270
- int n ;
1271
- int width = info -> width ;
1272
- int height = info -> height ;
1273
- Uint32 * srcp = (Uint32 * )info -> s_pixels ;
1274
- int srcskip = info -> s_skip >> 2 ;
1275
- Uint32 * dstp = (Uint32 * )info -> d_pixels ;
1276
- int dstskip = info -> d_skip >> 2 ;
1277
- SDL_PixelFormat * srcfmt = info -> src ;
1278
- Uint32 amask = srcfmt -> Amask ;
1279
- Uint32 ashift = srcfmt -> Ashift ;
1280
- Uint64 multmask2 ;
1281
-
1282
- __m64 src1 , dst1 , mm_alpha , mm_zero , mm_alpha2 ;
1283
-
1284
- mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */
1285
- multmask2 = 0x00FF00FF00FF00FFULL ;
1286
-
1287
- while (height -- ) {
1288
- /* *INDENT-OFF* */
1289
- LOOP_UNROLLED4 (
1290
- {
1291
- Uint32 alpha = * srcp & amask ;
1292
- if (alpha == 0 ) {
1293
- /* do nothing */
1294
- }
1295
- else if (alpha == amask ) {
1296
- * dstp = * srcp ;
1297
- }
1298
- else {
1299
- src1 = _mm_cvtsi32_si64 (
1300
- * srcp ); /* src(ARGB) -> src1 (0000ARGB) */
1301
- src1 =
1302
- _mm_unpacklo_pi8 (src1 , mm_zero ); /* 0A0R0G0B -> src1 */
1303
-
1304
- dst1 = _mm_cvtsi32_si64 (
1305
- * dstp ); /* dst(ARGB) -> dst1 (0000ARGB) */
1306
- dst1 =
1307
- _mm_unpacklo_pi8 (dst1 , mm_zero ); /* 0A0R0G0B -> dst1 */
1308
-
1309
- mm_alpha = _mm_cvtsi32_si64 (
1310
- alpha ); /* alpha -> mm_alpha (0000000A) */
1311
- mm_alpha = _mm_srli_si64 (
1312
- mm_alpha ,
1313
- ashift ); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
1314
- mm_alpha = _mm_unpacklo_pi16 (
1315
- mm_alpha , mm_alpha ); /* 00000A0A -> mm_alpha */
1316
- mm_alpha2 = _mm_unpacklo_pi32 (
1317
- mm_alpha , mm_alpha ); /* 0A0A0A0A -> mm_alpha2 */
1318
- mm_alpha2 = _mm_xor_si64 (
1319
- mm_alpha2 ,
1320
- * (__m64 * )& multmask2 ); /* 255 - mm_alpha -> mm_alpha */
1321
-
1322
- /* pre-multiplied alpha blend */
1323
- dst1 = _mm_mullo_pi16 (dst1 , mm_alpha2 );
1324
- dst1 = _mm_srli_pi16 (dst1 , 8 );
1325
- dst1 = _mm_add_pi16 (src1 , dst1 );
1326
- dst1 = _mm_packs_pu16 (dst1 , mm_zero );
1327
-
1328
- * dstp = _mm_cvtsi64_si32 (dst1 ); /* dst1 -> pixel */
1329
- }
1330
- ++ srcp ;
1331
- ++ dstp ;
1332
- },
1333
- n , width );
1334
- /* *INDENT-ON* */
1335
- srcp += srcskip ;
1336
- dstp += dstskip ;
1337
- }
1338
- _mm_empty ();
1339
- }
1340
- #endif /*__MMX__*/
1341
-
1342
1267
static void
1343
1268
blit_blend_premultiplied (SDL_BlitInfo * info )
1344
1269
{
0 commit comments