Skip to content

Commit 26e3182

Browse files
committed
fix: BuiltinByteString with inscrutable content.
1 parent c35ee9d commit 26e3182

File tree

3 files changed

+93
-23
lines changed

3 files changed

+93
-23
lines changed

plutus-tx-plugin/src/PlutusTx/Compiler/Expr.hs

+67-16
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ import Control.Monad.Reader (ask, asks, local)
7676
import Data.Array qualified as Array
7777
import Data.ByteString qualified as BS
7878
import Data.ByteString.Base16 qualified as Base16
79+
import Data.ByteString.Char8 qualified as BSC
7980
import Data.Generics.Uniplate.Data (transform, universeBi)
8081
import Data.List (elemIndex, isPrefixOf, isSuffixOf)
8182
import Data.Map qualified as Map
@@ -109,17 +110,34 @@ String literals are handled specially, see Note [String literals].
109110
-}
110111

111112
{- Note [unpackFoldrCString#]
112-
This function is introduced by rewrite rules, and usually eliminated by them in concert with `build`.
113+
This function is introduced by rewrite rules, and usually eliminated by them
114+
in concert with `build`.
113115
114-
However, since we often mark things as INLINABLE, we get pre-optimization Core where only the
115-
first transformation has fired. So we need to do something with the function.
116+
However, since we often mark things as INLINABLE, we get pre-optimization Core
117+
where only the first transformation has fired. So we need to do something with
118+
the function.
116119
117-
- We can't easily turn it into a normal fold expression, since we'd need to make a lambda and
118-
we're not in 'CoreM' so we can't make fresh names.
119-
- We can't easily translate it to a builtin, since we don't support higher-order functions.
120+
- We can't easily turn it into a normal fold expression, since we'd need to make
121+
a lambda and we're not in 'CoreM' so we can't make fresh names.
120122
121-
So we use a horrible hack and match on `build . unpackFoldrCString#` to "undo" the original rewrite
122-
rule.
123+
- We can't easily translate it to a builtin, since we don't support higher-order
124+
functions.
125+
126+
So we use a horrible hack and match on `build . unpackFoldrCString#` to "undo"
127+
the original rewrite rule.
128+
129+
Moreover, in some cases GHC generates such expressions where the first literal
130+
character is "un-consed" from its tail, for example:
131+
132+
GHC.Types.:
133+
@GHC.Types.Char
134+
(GHC.Types.C# 'f'#)
135+
(GHC.Base.build
136+
@GHC.Types.Char
137+
(\@b -> GHC.CString.unpackFoldrCString# @b "0d1"#)
138+
)
139+
140+
Then we re-do the cons after un-doing the original rewrite rule.
123141
-}
124142

125143
compileLiteral ::
@@ -149,14 +167,44 @@ tryStringLiteralAsBytes coreExpr = case coreExpr of
149167
GHC.Var isUnpackCStringUtf8 `GHC.App` GHC.Lit (GHC.LitString bytes)
150168
| GHC.getName isUnpackCStringUtf8 == GHC.unpackCStringUtf8Name ->
151169
Just bytes
152-
-- See Note [unpackFoldrCString#]
153-
GHC.Var build `GHC.App` _ `GHC.App` GHC.Lam _ (GHC.Var unpack `GHC.App` _ `GHC.App` expr)
154-
| GHC.getName build == GHC.buildName && GHC.getName unpack == GHC.unpackCStringFoldrName ->
155-
tryStringLiteralAsBytes expr
156-
-- GHC helpfully generates an empty list for the empty string literal instead of a 'LitString'
170+
171+
{- See Note [unpackFoldrCString#]
172+
173+
Example GHC Core expr this pattern matches:
174+
GHC.Base.build
175+
@GHC.Types.Char
176+
(\@b -> GHC.CString.unpackFoldrCString# @b "0d1"#)
177+
178+
-}
179+
GHC.Var build
180+
`GHC.App` GHC.Type (GHC.TyConApp charTyCon _kindOrType)
181+
`GHC.App` GHC.Lam _ (GHC.Var unpack `GHC.App` _ `GHC.App` expr)
182+
| GHC.getName build == GHC.buildName
183+
, GHC.getName unpack == GHC.unpackCStringFoldrName
184+
, GHC.getName charTyCon == GHC.charTyConName ->
185+
tryStringLiteralAsBytes expr
186+
{-
187+
Example GHC Core expr this pattern matches:
188+
GHC.Types.: @GHC.Types.Char (GHC.Types.C# 'f'#) expr
189+
-}
190+
( GHC.Var consId
191+
`GHC.App` (GHC.Type (GHC.TyConApp charTyCon _kindOrType))
192+
`GHC.App` (GHC.Var cSharp `GHC.App` GHC.Lit (GHC.LitChar c))
193+
) `GHC.App` expr
194+
| GHC.getName charTyCon == GHC.charTyConName
195+
, Just consDataCon <- GHC.isDataConId_maybe consId
196+
, GHC.consDataCon == consDataCon
197+
, Just charDataCon <- GHC.isDataConId_maybe cSharp
198+
, GHC.charDataCon == charDataCon ->
199+
BSC.cons c <$> tryStringLiteralAsBytes expr
200+
201+
-- GHC helpfully generates an empty list for the empty string literal instead
202+
-- of a 'LitString'
157203
GHC.Var nil `GHC.App` GHC.Type (GHC.tyConAppTyCon_maybe -> Just tc)
158-
| nil == GHC.dataConWorkId GHC.nilDataCon, GHC.getName tc == GHC.charTyConName ->
204+
| nil == GHC.dataConWorkId GHC.nilDataCon
205+
, GHC.getName tc == GHC.charTyConName ->
159206
Just mempty
207+
160208
-- Chase variable references! GHC likes to lift string constants to variables,
161209
-- that is not good for us!
162210
GHC.Var (GHC.maybeUnfoldingTemplate . GHC.realIdUnfolding -> Just unfolding) ->
@@ -201,10 +249,13 @@ If 'ByteString' contains a codepoint that is not in this range, the function wil
201249
-}
202250
utf8CodePointsAsBytes :: Compiling uni fun m ann => BS.ByteString -> m BS.ByteString
203251
utf8CodePointsAsBytes bs =
204-
case gracefullyDecodeUtf8Bytes (BS.unpack bs) of
205-
Just bytes -> pure $ BS.pack bytes
252+
case tryUtf8CodePointsAsBytes bs of
253+
Just bytes -> pure bytes
206254
Nothing -> throwPlain . CompilationError $
207255
"ByteString literal is expected to contain only codepoints in the range 0 - 255 (0x00 - 0xFF)"
256+
257+
tryUtf8CodePointsAsBytes :: BS.ByteString -> Maybe BS.ByteString
258+
tryUtf8CodePointsAsBytes = fmap BS.pack . gracefullyDecodeUtf8Bytes . BS.unpack
208259
where
209260
{-
210261
Why not use 'Data.Text.Encoding'?
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,30 @@
11
{-# LANGUAGE NoImplicitPrelude #-}
22
{-# LANGUAGE OverloadedStrings #-}
3+
{-# OPTIONS_GHC -fno-full-laziness #-}
4+
{-# OPTIONS_GHC -fno-ignore-interface-pragmas #-}
5+
{-# OPTIONS_GHC -fno-omit-interface-pragmas #-}
6+
{-# OPTIONS_GHC -fno-spec-constr #-}
7+
{-# OPTIONS_GHC -fno-specialise #-}
8+
{-# OPTIONS_GHC -fno-strictness #-}
9+
{-# OPTIONS_GHC -fno-unbox-small-strict-fields #-}
10+
{-# OPTIONS_GHC -fno-unbox-strict-fields #-}
311
{-# OPTIONS_GHC -fplugin PlutusTx.Plugin #-}
12+
{-# OPTIONS_GHC -fplugin-opt PlutusTx.Plugin:target-version=1.1.0 #-}
413

514
module ByteStringLiterals.Lib where
615

716
import PlutusTx.Builtins (BuiltinByteStringHex (..))
817

18+
{-
19+
20+
PlutusTx.Builtins.HasOpaque.stringToBuiltinByteStringHex
21+
(GHC.Types.:
22+
@GHC.Types.Char
23+
(GHC.Types.C# 'f'#)
24+
(GHC.Base.build
25+
@GHC.Types.Char
26+
(\ (@b) -> GHC.CString.unpackFoldrCString# @b "0d1"#)))
27+
-}
928
{-# INLINEABLE hex #-}
1029
hex :: BuiltinByteStringHex
11-
hex = "f0"
30+
hex = "f0d1"

plutus-tx-plugin/test/ByteStringLiterals/Spec.hs

+6-6
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ tests =
3636
, test_CompileBuiltinByteStringLiteral_utf8
3737
, test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringUtf8
3838
, test_CompileBuiltinByteStringLiteral_hex
39-
, test_CompileBuiltinByteStringLiteral2_hex
39+
, test_CompileBuiltinByteStringLiteral_hex_imported
4040
, test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringHex
4141
]
4242
]
@@ -149,7 +149,7 @@ test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringUtf8 =
149149

150150
test_CompileBuiltinByteStringLiteral_hex :: TestTree
151151
test_CompileBuiltinByteStringLiteral_hex =
152-
testCase "BuiltinByteStringHex" do
152+
testCase "BuiltinByteStringHex (local)" do
153153
term compiledLiteral @?= expectedUplc
154154
where
155155
compiledLiteral :: CompiledCode BuiltinByteStringHex =
@@ -190,11 +190,11 @@ test_CompileBuiltinByteStringLiteral_hex =
190190
||]
191191
)
192192

193-
test_CompileBuiltinByteStringLiteral2_hex :: TestTree
194-
test_CompileBuiltinByteStringLiteral2_hex =
195-
testCase "BuiltinByteStringHex" $
193+
test_CompileBuiltinByteStringLiteral_hex_imported :: TestTree
194+
test_CompileBuiltinByteStringLiteral_hex_imported =
195+
testCase "BuiltinByteStringHex (imported)" $
196196
term $$(compile [||Lib.hex||])
197-
@?= Constant () (Some (ValueOf DefaultUniByteString "\240"))
197+
@?= Constant () (Some (ValueOf DefaultUniByteString "\240\209"))
198198

199199
test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringHex :: TestTree
200200
test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringHex =

0 commit comments

Comments
 (0)