Perl
diff --git a/‎XSUB.h
Lines changed: 2 additions & 0 deletions b/‎XSUB.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎embed.fnc
Lines changed: 7 additions & 1 deletion b/‎embed.fnc
Lines changed: 7 additions & 1 deletion
diff --git a/‎iperlsys.h
Lines changed: 19 additions & 0 deletions b/‎iperlsys.h
Lines changed: 19 additions & 0 deletions
diff --git a/‎mathoms.c
Lines changed: 51 additions & 0 deletions b/‎mathoms.c
Lines changed: 51 additions & 0 deletions
diff --git a/‎perl.h
Lines changed: 146 additions & 19 deletions b/‎perl.h
Lines changed: 146 additions & 19 deletions
diff --git a/‎proto.h
Lines changed: 25 additions & 0 deletions b/‎proto.h
Lines changed: 25 additions & 0 deletions
@@ -603,10 +603,12 @@ Rethrows a previously caught exception.  See L<perlguts/"Exception Handling">.
 #    define signal		PerlProc_signal
 #    define getpid		PerlProc_getpid
 #    define gettimeofday	PerlProc_gettimeofday
+#ifndef PERL_MY_HOST_NET_BYTE_SWAP
 #    define htonl		PerlSock_htonl
 #    define htons		PerlSock_htons
 #    define ntohl		PerlSock_ntohl
 #    define ntohs		PerlSock_ntohs
+#endif
 #    define accept		PerlSock_accept
 #    define bind		PerlSock_bind
 #    define connect		PerlSock_connect
 
@@ -6468,7 +6468,13 @@ p	|bool	|get_win32_message_utf8ness				\
 				|NULLOK const char *string
 Teor	|void	|win32_croak_not_implemented				\
 				|NN const char *fname
-#else
+# if !defined(PERL_MY_HOST_NET_BYTE_SWAP)
+DTbo	|u_long |win32_htonl	|u_long hostlong
+DTbo	|u_short|win32_htons	|u_short hostshort
+DTbo	|u_long |win32_ntohl	|u_long netlong
+DTbo	|u_short|win32_ntohs	|u_short netshort
+# endif
+#else /* if !defined(WIN32) */
 p	|bool	|do_exec3	|NN const char *incmd			\
 				|int fd 				\
 				|int do_report
 
@@ -1144,10 +1144,12 @@ struct IPerlProcInfo
 /* PerlSock             */
 struct IPerlSock;
 struct IPerlSockInfo;
+#ifndef PERL_MY_HOST_NET_BYTE_SWAP
 typedef u_long          (*LPHtonl)(const struct IPerlSock**, u_long);
 typedef u_short         (*LPHtons)(const struct IPerlSock**, u_short);
 typedef u_long          (*LPNtohl)(const struct IPerlSock**, u_long);
 typedef u_short         (*LPNtohs)(const struct IPerlSock**, u_short);
+#endif
 typedef SOCKET          (*LPAccept)(const struct IPerlSock**, SOCKET,
                             struct sockaddr*, int*);
 typedef int             (*LPBind)(const struct IPerlSock**, SOCKET,
@@ -1208,10 +1210,12 @@ typedef int             (*LPClosesocket)(const struct  IPerlSock**, SOCKET s);
 
 struct IPerlSock
 {
+#ifndef PERL_MY_HOST_NET_BYTE_SWAP
     LPHtonl             pHtonl;
     LPHtons             pHtons;
     LPNtohl             pNtohl;
     LPNtohs             pNtohs;
+#endif
     LPAccept            pAccept;
     LPBind              pBind;
     LPConnect           pConnect;
@@ -1262,6 +1266,19 @@ struct IPerlSockInfo
     struct IPerlSock    perlSockList;
 };
 
+#ifdef PERL_MY_HOST_NET_BYTE_SWAP
+   /* perl.h has provides a much more efficient inlined implementation of
+      htonl(), htons(), ntohl(), ntohs() compared to the "native" exported
+      extern linkage functions exported by ws2_32.dll. Both Mingw GCCs
+      and MSVCs headers, only offer exports from ws2_32.dll for those 4
+      tokens, without any alternative. Writing "r = htonl(n);" a C Windows
+      app is an improper anti-pattern. The official, correct, identifier is
+      RtlUlongByteSwap() on the Windows Platform. */
+#  define PerlSock_htonl(x)             htonl(x)
+#  define PerlSock_htons(x)             htons(x)
+#  define PerlSock_ntohl(x)             ntohl(x)
+#  define PerlSock_ntohs(x)             ntohs(x)
+#else
 #  define PerlSock_htonl(x)                                             \
         ((*(PL_Sock))->pHtonl)(PL_Sock, x)
 #  define PerlSock_htons(x)                                             \
@@ -1270,6 +1287,8 @@ struct IPerlSockInfo
         ((*(PL_Sock))->pNtohl)(PL_Sock, x)
 #  define PerlSock_ntohs(x)                                             \
         ((*(PL_Sock))->pNtohs)(PL_Sock, x)
+#endif
+
 #  define PerlSock_accept(s, a, l)                                      \
         ((*(PL_Sock))->pAccept)(PL_Sock, s, a, l)
 #  define PerlSock_bind(s, n, l)                                        \
 
@@ -883,6 +883,57 @@ Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
     return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen);
 }
 
+/* These 4 exported functions are unused/deprecated/mathoms. WinPerl hooks
+   and replaces the htonl(), ntohl(), etc symbols provided by Mingw GCC and
+   MSVC headers, which are exported extern "C" functions from ws2_32.dll,
+   with 1 CPU instruction big, inline intrinsics. ws2_32.dll's implementation
+   is 11-15 instructions long. Clang and GCC for WinOS correctly convert
+   expression "& << |" to 1 CPU instruction, MSVC build numbers released prior
+   to Fall 2023 don't do that 1 CPU opcode optimization.
+   Because of frozen public API src code compatibility and object code linker
+   reasons, neither (Mingw or MS SDK) Clang, Mingw GCC or MSVC compilers can
+   correct the day 1 1993 mistake that tokens htonl(), ntohl(), etc, are
+   external linkage symbols from ws2_32.dll,
+
+   cpangrep shows exactly 1 module uses WinPerl's win32_*() prefixed sockets API
+   byte order swappers.
+
+   cpangrep: win32_htonl|win32_htons|win32_ntohl|win32_ntohs
+   https://metacpan.org/release/Prima/source/win32/files.c#L750
+
+   For now, these symbols are still exported, incase they are linked by a
+   XS .dll, that has a TU/.o/.c that doesn't #include "perl.h" and
+   declared function win32_htonl() themselves or they are using GetProcAddress(). */
+
+#undef win32_htonl
+#undef win32_htons
+#undef win32_ntohl
+#undef win32_ntohs
+
+u_long
+win32_htonl(u_long hostlong)
+{
+    return htonl(hostlong);
+}
+
+u_short
+win32_htons(u_short hostshort)
+{
+    return htons(hostshort);
+}
+
+u_long
+win32_ntohl(u_long netlong)
+{
+    return ntohl(netlong);
+}
+
+u_short
+win32_ntohs(u_short netshort)
+{
+    return ntohs(netshort);
+}
+
 GCC_DIAG_RESTORE
 
 #endif /* NO_MATHOMS */
 
@@ -4571,23 +4571,61 @@ struct ptr_tbl {
     struct ptr_tbl_ent		*tbl_arena_end;
 };
 
-#if defined(htonl) && !defined(HAS_HTONL)
-#define HAS_HTONL
-#endif
-#if defined(htons) && !defined(HAS_HTONS)
-#define HAS_HTONS
-#endif
-#if defined(ntohl) && !defined(HAS_NTOHL)
-#define HAS_NTOHL
-#endif
-#if defined(ntohs) && !defined(HAS_NTOHS)
-#define HAS_NTOHS
+/* Override the C compiler's built in byte order swapping functions and
+   implement our own BE/LE conversion functions. All modern CCs on all CPU archs
+   should be using an inline intrinsic, that maps to 1 CPU instructions, max 3.
+   Worst possible correct and incompetent implementation a C compiler can do:
+
+   libperl calls symbol htonl() through PLT/GOT to libc, libc then declares
+   stack 2 x unsigned char buf[4]; plus 2 calls through PLT/GOT to symbol
+   memcpy(); plus for( i=0; i<4; i++) {} loop.
+
+   Before defining PERL_MY_HOST_NET_BYTE_SWAP, remember to read the -O1 or -O2
+   optimized assembly code created by a production grade C compiler, to see
+   if there actually is a defect/flaw or not with the permutation of CC/CPU/OS
+   you are using. Don't accidentally turn 1 CPU op into 6 CPU ops, and document
+   that change as an "optimization".
+
+   The only known flawed CC is MSVC all versions and build numbers of
+   cl.exe/link.exe released to the pubic before Fall 2023. It has a 1 of 5
+   stars, if C token "htonl()" is used (perl uses Win32's PLT/GOT).
+   2 of 5 stars if "& << |" expression is used. Build numbers of MSVC released
+   during or after Fall 2023, produce correct and perfect machine code identical
+   to what GCC/Clang would emit. */
+
+#ifdef PERL_MY_HOST_NET_BYTE_SWAP
+#  undef htonl
+#  undef HAS_HTONL
+#  undef htons
+#  undef HAS_HTONS
+#  undef ntohl
+#  undef HAS_NTOHL
+#  undef ntohl
+#  undef HAS_NTOHL
+#  undef ntohs
+#  undef HAS_NTOHS
+#  undef htonll
+#  undef HAS_HTONLL
+#else
+#  if defined(htonl) && !defined(HAS_HTONL)
+#    define HAS_HTONL
+#  endif
+#  if defined(htons) && !defined(HAS_HTONS)
+#    define HAS_HTONS
+#  endif
+#  if defined(ntohl) && !defined(HAS_NTOHL)
+#    define HAS_NTOHL
+#  endif
+#  if defined(ntohs) && !defined(HAS_NTOHS)
+#    define HAS_NTOHS
+#  endif
 #endif
+
 #ifndef HAS_HTONL
-#define HAS_HTONS
-#define HAS_HTONL
-#define HAS_NTOHS
-#define HAS_NTOHL
+#  define HAS_HTONS
+#  define HAS_HTONL
+#  define HAS_NTOHS
+#  define HAS_NTOHL
 #  if (BYTEORDER & 0xffff) == 0x4321
 /* Big endian system, so ntohl, ntohs, htonl and htons do not need to
    re-order their values. However, to behave identically to the alternative
@@ -4604,21 +4642,65 @@ struct ptr_tbl {
    that *declare* the various functions are still seen. If we declare our own
    htonl etc they will clash with the declarations in the Win32 headers.  */
 
+#    ifdef _MSC_VER
+#      pragma intrinsic(_byteswap_ulong)
+#      pragma intrinsic(_byteswap_ushort)
+#    endif
+
+#    if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(DEBUGGING))
 PERL_STATIC_INLINE U32
 my_swap32(const U32 x) {
+#      ifdef _MSC_VER
+    return _byteswap_ulong(x);
+#      else
     return ((x & 0xFF) << 24) | ((x >> 24) & 0xFF)
         | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8);
+#      endif
 }
 
 PERL_STATIC_INLINE U16
 my_swap16(const U16 x) {
+
+#      ifdef _MSC_VER
+    return _byteswap_ushort(x);
+#      else
     return ((x & 0xFF) << 8) | ((x >> 8) & 0xFF);
+#      endif
 }
+#    endif
+
+/* all CCs except MSVC use the static inlines above, unoptimized MSVC Perl
+   built with -DDEBUGGING, also uses the statics above, to make single stepping
+   and breakpoints easier to use. MSVC's C/C++ front end parser does not
+   recogize the traditional "& << |" expression as a synonym for i386/x64/ARM's
+   byteswap CPU instruction. This MSVC bug was fixed in MSVC 2022 build number
+   19.37/17.7 released Aug 8 2023. All MSVC 2022 build numbers <= 19.36/17.6
+   have the bug. Explicitly tell MSVC to use the byte swap opcode solves the
+   problem. VC's _byteswap_ulong() is declared as an intrinsic function.
+   It will not cause multi-eval problems the way a macro would. So skip the
+   my_swap() wrappers.
+
+   TODO: add special casing for __bswap_32(), __builtin_bswap32(),
+   bswap_32(), cpu_to_be32(), swap32(), read_be32(), write_be32(), htobe32(),
+   OSSwapInt32(), and on April 1st, impliment RtlUlongByteSwap(). Remember to
+   the history and HW compatibility of each of these CC tokens before adding
+   them. If hello_world.c executes and the Desktop GUI works, the defaults
+   in Configure can't produce a binary that SIGILLs on 1 of 2 systems,
+   made 2 years apart, running the same OS version, after a copy paste or
+   cloud server deployment of a /usr/bin/perl file.
+*/
 
-#    define htonl(x)    my_swap32(x)
-#    define ntohl(x)    my_swap32(x)
-#    define ntohs(x)    my_swap16(x)
-#    define htons(x)    my_swap16(x)
+#    if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(DEBUGGING))
+#      define htonl(x)    my_swap32(x)
+#      define ntohl(x)    my_swap32(x)
+#      define ntohs(x)    my_swap16(x)
+#      define htons(x)    my_swap16(x)
+#    else
+#      define htonl(x)    _byteswap_ulong(x)
+#      define ntohl(x)    _byteswap_ulong(x)
+#      define ntohs(x)    _byteswap_ushort(x)
+#      define htons(x)    _byteswap_ushort(x)
+#    endif
 #  else
 #    error "Unsupported byteorder"
 /* The C pre-processor doesn't let us return the value of BYTEORDER as part of
@@ -4635,6 +4717,51 @@ my_swap16(const U16 x) {
 #  endif
 #endif
 
+#if defined(htonll) && !defined(HAS_HTONLL)
+#  define HAS_HTONLL
+#endif
+
+#ifndef HAS_HTONLL
+#  define HAS_HTONLL
+#  if (BYTEORDER & 0xffff) == 0x4321
+#    define ntohll(x)    ((x)&0xFFFFFFFFFFFFFFFF)
+#    define htonll(x)    ntohll(x)
+#  elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
+#    ifdef _MSC_VER
+#      pragma intrinsic(_byteswap_uint64)
+#    endif
+
+#    if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(DEBUGGING))
+PERL_STATIC_INLINE U64
+my_swap64(const U64 x) {
+#      ifdef _MSC_VER
+    return _byteswap_uint64(x);
+#      else
+/*  return ( ((x & 0xff00000000000000) >> 56) | ((x & 0x00ff000000000000) >> 40)
+           | ((x & 0x0000ff0000000000) >> 24) | ((x & 0x000000ff00000000) >> 8)
+           | ((x & 0x00000000ff000000) << 8)  | ((x & 0x0000000000ff0000) << 24)
+           | ((x & 0x000000000000ff00) << 40) | ((x & 0x00000000000000ff) << 56));
+    return ((U64)htonl(x & 0xFFFFFFFF) << 32) | htonl(x >> 32); */
+    U64 r;
+    r = (x & 0x00000000FFFFFFFF) << 32 | (x & 0xFFFFFFFF00000000) >> 32;
+    r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16;
+    r = (r & 0x00FF00FF00FF00FF) << 8  | (r & 0xFF00FF00FF00FF00) >> 8;
+    return r;
+#      endif
+}
+#    endif
+#    if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(DEBUGGING))
+#      define htonll(x)    my_swap64(x)
+#      define ntohll(x)    my_swap64(x)
+#    else
+#      define htonll(x)    _byteswap_uint64(x)
+#      define ntohll(x)    _byteswap_uint64(x)
+#    endif
+#  else
+#    error "Unsupported byteorder"
+#  endif
+#endif
+
 /*
  * Little-endian byte order functions - 'v' for 'VAX', or 'reVerse'.
  * -DWS