1
1
using System ;
2
+ #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1
3
+ using System . Buffers ;
4
+ #endif
2
5
using System . Collections ;
3
6
using System . Collections . Generic ;
4
7
using System . Globalization ;
@@ -711,14 +714,22 @@ bool IsUtf8()
711
714
}
712
715
}
713
716
717
+ private static readonly Encoding _iso88591ExceptionFallback = Encoding . GetEncoding ( 28591 , new EncoderExceptionFallback ( ) , new DecoderExceptionFallback ( ) ) ; // ISO-8859-1
714
718
/// <summary>
715
719
/// Checks if the given string can be accurately represented and retrieved in ISO-8859-1 encoding.
716
720
/// </summary>
717
721
private static bool IsValidISO ( string input )
718
722
{
719
- var bytes = Encoding . GetEncoding ( "ISO-8859-1" ) . GetBytes ( input ) ;
720
- var result = Encoding . GetEncoding ( "ISO-8859-1" ) . GetString ( bytes ) ;
721
- return String . Equals ( input , result ) ;
723
+ // No heap allocations if the string is ISO-8859-1
724
+ try
725
+ {
726
+ _ = _iso88591ExceptionFallback . GetByteCount ( input ) ;
727
+ return true ;
728
+ }
729
+ catch ( EncoderFallbackException ) // The exception is a heap allocation and not ideal
730
+ {
731
+ return false ;
732
+ }
722
733
}
723
734
724
735
/// <summary>
@@ -832,18 +843,13 @@ private static BitArray PlainTextToBinaryAlphanumeric(string plainText)
832
843
return codeText ;
833
844
}
834
845
835
- /// <summary>
836
- /// Returns a string that contains the original string, with characters that cannot be encoded by a
837
- /// specified encoding (default of ISO-8859-2) with a replacement character.
838
- /// </summary>
839
- private static string ConvertToIso8859 ( string value , string Iso = "ISO-8859-2" )
840
- {
841
- Encoding iso = Encoding . GetEncoding ( Iso ) ;
842
- Encoding utf8 = Encoding . UTF8 ;
843
- byte [ ] utfBytes = utf8 . GetBytes ( value ) ;
844
- byte [ ] isoBytes = Encoding . Convert ( utf8 , iso , utfBytes ) ;
845
- return iso . GetString ( isoBytes ) ;
846
- }
846
+ private static readonly Encoding _iso8859_1 =
847
+ #if NET5_0_OR_GREATER
848
+ Encoding . Latin1 ;
849
+ #else
850
+ Encoding . GetEncoding ( 28591 ) ; // ISO-8859-1
851
+ #endif
852
+ private static Encoding _iso8859_2 ;
847
853
848
854
/// <summary>
849
855
/// Converts plain text into a binary format using byte mode encoding, which supports various character encodings through ECI (Extended Channel Interpretations).
@@ -860,35 +866,81 @@ private static string ConvertToIso8859(string value, string Iso = "ISO-8859-2")
860
866
/// </remarks>
861
867
private static BitArray PlainTextToBinaryByte ( string plainText , EciMode eciMode , bool utf8BOM , bool forceUtf8 )
862
868
{
863
- byte [ ] codeBytes ;
869
+ Encoding targetEncoding ;
864
870
865
871
// Check if the text is valid ISO-8859-1 and UTF-8 is not forced, then encode using ISO-8859-1.
866
872
if ( IsValidISO ( plainText ) && ! forceUtf8 )
867
- codeBytes = Encoding . GetEncoding ( "ISO-8859-1" ) . GetBytes ( plainText ) ;
873
+ {
874
+ targetEncoding = _iso8859_1 ;
875
+ utf8BOM = false ;
876
+ }
868
877
else
869
878
{
870
879
// Determine the encoding based on the specified ECI mode.
871
880
switch ( eciMode )
872
881
{
873
882
case EciMode . Iso8859_1 :
874
883
// Convert text to ISO-8859-1 and encode.
875
- codeBytes = Encoding . GetEncoding ( "ISO-8859-1" ) . GetBytes ( ConvertToIso8859 ( plainText , "ISO-8859-1" ) ) ;
884
+ targetEncoding = _iso8859_1 ;
885
+ utf8BOM = false ;
876
886
break ;
877
887
case EciMode . Iso8859_2 :
888
+ // Note: ISO-8859-2 is not natively supported on .NET Core
889
+ //
890
+ // Users must install the System.Text.Encoding.CodePages package and call Encoding.RegisterProvider(CodePagesEncodingProvider.Instance)
891
+ // before using this encoding mode.
892
+ if ( _iso8859_2 == null )
893
+ _iso8859_2 = Encoding . GetEncoding ( 28592 ) ; // ISO-8859-2
878
894
// Convert text to ISO-8859-2 and encode.
879
- codeBytes = Encoding . GetEncoding ( "ISO-8859-2" ) . GetBytes ( ConvertToIso8859 ( plainText , "ISO-8859-2" ) ) ;
895
+ targetEncoding = _iso8859_2 ;
896
+ utf8BOM = false ;
880
897
break ;
881
898
case EciMode . Default :
882
899
case EciMode . Utf8 :
883
900
default :
884
901
// Handle UTF-8 encoding, optionally adding a BOM if specified.
885
- codeBytes = utf8BOM ? Encoding . UTF8 . GetPreamble ( ) . Concat ( Encoding . UTF8 . GetBytes ( plainText ) ) . ToArray ( ) : Encoding . UTF8 . GetBytes ( plainText ) ;
902
+ targetEncoding = Encoding . UTF8 ;
886
903
break ;
887
904
}
888
905
}
889
906
907
+ #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1
908
+ // We can use stackalloc for small arrays to prevent heap allocations
909
+ const int MAX_STACK_SIZE_IN_BYTES = 512 ;
910
+
911
+ int count = targetEncoding . GetByteCount ( plainText ) ;
912
+ byte [ ] bufferFromPool = null ;
913
+ Span < byte > codeBytes = ( count <= MAX_STACK_SIZE_IN_BYTES )
914
+ ? ( stackalloc byte [ MAX_STACK_SIZE_IN_BYTES ] )
915
+ : ( bufferFromPool = ArrayPool < byte > . Shared . Rent ( count ) ) ;
916
+ codeBytes = codeBytes . Slice ( 0 , count ) ;
917
+ targetEncoding . GetBytes ( plainText , codeBytes ) ;
918
+ #else
919
+ byte [ ] codeBytes = targetEncoding . GetBytes ( plainText ) ;
920
+ #endif
921
+
890
922
// Convert the array of bytes into a BitArray.
891
- return ToBitArray ( codeBytes ) ;
923
+ BitArray bitArray ;
924
+ if ( utf8BOM )
925
+ {
926
+ // convert to bit array, leaving 24 bits for the UTF-8 preamble
927
+ bitArray = ToBitArray ( codeBytes , 24 ) ;
928
+ // write UTF8 preamble (EF BB BF) to the BitArray
929
+ DecToBin ( 0xEF , 8 , bitArray , 0 ) ;
930
+ DecToBin ( 0xBB , 8 , bitArray , 8 ) ;
931
+ DecToBin ( 0xBF , 8 , bitArray , 16 ) ;
932
+ }
933
+ else
934
+ {
935
+ bitArray = ToBitArray ( codeBytes ) ;
936
+ }
937
+
938
+ #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1
939
+ if ( bufferFromPool != null )
940
+ ArrayPool < byte > . Shared . Return ( bufferFromPool ) ;
941
+ #endif
942
+
943
+ return bitArray ;
892
944
}
893
945
894
946
/// <summary>
@@ -898,7 +950,13 @@ private static BitArray PlainTextToBinaryByte(string plainText, EciMode eciMode,
898
950
/// <param name="byteArray">The byte array to convert into a BitArray.</param>
899
951
/// <param name="prefixZeros">The number of leading zeros to prepend to the resulting BitArray.</param>
900
952
/// <returns>A BitArray representing the bits of the input byteArray, with optional leading zeros.</returns>
901
- private static BitArray ToBitArray ( byte [ ] byteArray , int prefixZeros = 0 )
953
+ private static BitArray ToBitArray (
954
+ #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1
955
+ ReadOnlySpan < byte > byteArray , // byte[] has an implicit cast to ReadOnlySpan<byte>
956
+ #else
957
+ byte [ ] byteArray ,
958
+ #endif
959
+ int prefixZeros = 0 )
902
960
{
903
961
// Calculate the total number of bits in the resulting BitArray including the prefix zeros.
904
962
var bitArray = new BitArray ( ( int ) ( ( uint ) byteArray . Length * 8 ) + prefixZeros ) ;
0 commit comments