Skip to content

Fix: Use of fromString @BuiltinByteStringHex with inscrutable content #6963

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- In some cases HEX-encoded string literals were not handled correctly,
causing the "Use of fromString @ PlutusTx.Builtins.HasOpaque.BuiltinByteStringHex with inscrutable content" error.
1 change: 1 addition & 0 deletions plutus-tx-plugin/plutus-tx-plugin.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ test-suite plutus-tx-plugin-tests
Budget.WithGHCOptimisations
Budget.WithoutGHCOptimisations
BuiltinList.Budget.Spec
ByteStringLiterals.Lib
ByteStringLiterals.Spec
DataList.Budget.Spec
Inline.Spec
Expand Down
82 changes: 66 additions & 16 deletions plutus-tx-plugin/src/PlutusTx/Compiler/Expr.hs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ import Control.Monad.Reader (ask, asks, local)
import Data.Array qualified as Array
import Data.ByteString qualified as BS
import Data.ByteString.Base16 qualified as Base16
import Data.ByteString.Char8 qualified as BSC
import Data.Generics.Uniplate.Data (transform, universeBi)
import Data.List (elemIndex, isPrefixOf, isSuffixOf)
import Data.Map qualified as Map
Expand Down Expand Up @@ -109,17 +110,34 @@ String literals are handled specially, see Note [String literals].
-}

{- Note [unpackFoldrCString#]
This function is introduced by rewrite rules, and usually eliminated by them in concert with `build`.
This function is introduced by rewrite rules, and usually eliminated by them
in concert with `build`.

However, since we often mark things as INLINABLE, we get pre-optimization Core where only the
first transformation has fired. So we need to do something with the function.
However, since we often mark things as INLINABLE, we get pre-optimization Core
where only the first transformation has fired. So we need to do something with
the function.

- We can't easily turn it into a normal fold expression, since we'd need to make a lambda and
we're not in 'CoreM' so we can't make fresh names.
- We can't easily translate it to a builtin, since we don't support higher-order functions.
- We can't easily turn it into a normal fold expression, since we'd need to make
a lambda and we're not in 'CoreM' so we can't make fresh names.

So we use a horrible hack and match on `build . unpackFoldrCString#` to "undo" the original rewrite
rule.
- We can't easily translate it to a builtin, since we don't support higher-order
functions.

So we use a horrible hack and match on `build . unpackFoldrCString#` to "undo"
the original rewrite rule.

Moreover, in some cases GHC generates such expressions where the first literal
character is "un-consed" from its tail, for example:

GHC.Types.:
@GHC.Types.Char
(GHC.Types.C# 'f'#)
(GHC.Base.build
@GHC.Types.Char
(\@b -> GHC.CString.unpackFoldrCString# @b "0d1"#)
)

Then we re-do the cons after un-doing the original rewrite rule.
-}

compileLiteral ::
Expand Down Expand Up @@ -149,14 +167,43 @@ tryStringLiteralAsBytes coreExpr = case coreExpr of
GHC.Var isUnpackCStringUtf8 `GHC.App` GHC.Lit (GHC.LitString bytes)
| GHC.getName isUnpackCStringUtf8 == GHC.unpackCStringUtf8Name ->
Just bytes
-- See Note [unpackFoldrCString#]
GHC.Var build `GHC.App` _ `GHC.App` GHC.Lam _ (GHC.Var unpack `GHC.App` _ `GHC.App` expr)
| GHC.getName build == GHC.buildName && GHC.getName unpack == GHC.unpackCStringFoldrName ->
tryStringLiteralAsBytes expr
-- GHC helpfully generates an empty list for the empty string literal instead of a 'LitString'

{- See Note [unpackFoldrCString#]

Example GHC Core expr this pattern matches:
GHC.Base.build
@GHC.Types.Char
(\@b -> GHC.CString.unpackFoldrCString# @b "0d1"#)
-}
GHC.Var build
`GHC.App` GHC.Type (GHC.TyConApp charTyCon _kindOrType)
`GHC.App` GHC.Lam _ (GHC.Var unpack `GHC.App` _ `GHC.App` expr)
| GHC.getName build == GHC.buildName
, GHC.getName unpack == GHC.unpackCStringFoldrName
, GHC.getName charTyCon == GHC.charTyConName ->
tryStringLiteralAsBytes expr
{-
Example GHC Core expr this pattern matches:
GHC.Types.: @GHC.Types.Char (GHC.Types.C# 'f'#) expr
-}
GHC.Var consId
`GHC.App` GHC.Type (GHC.TyConApp charTyCon _kindOrType)
`GHC.App` (GHC.Var cSharp `GHC.App` GHC.Lit (GHC.LitChar c))
`GHC.App` expr
| GHC.getName charTyCon == GHC.charTyConName
, Just consDataCon <- GHC.isDataConId_maybe consId
, GHC.consDataCon == consDataCon
, Just charDataCon <- GHC.isDataConId_maybe cSharp
, GHC.charDataCon == charDataCon ->
BSC.cons c <$> tryStringLiteralAsBytes expr

-- GHC helpfully generates an empty list for the empty string literal instead
-- of a 'LitString'
GHC.Var nil `GHC.App` GHC.Type (GHC.tyConAppTyCon_maybe -> Just tc)
| nil == GHC.dataConWorkId GHC.nilDataCon, GHC.getName tc == GHC.charTyConName ->
| nil == GHC.dataConWorkId GHC.nilDataCon
, GHC.getName tc == GHC.charTyConName ->
Just mempty

-- Chase variable references! GHC likes to lift string constants to variables,
-- that is not good for us!
GHC.Var (GHC.maybeUnfoldingTemplate . GHC.realIdUnfolding -> Just unfolding) ->
Expand Down Expand Up @@ -201,10 +248,13 @@ If 'ByteString' contains a codepoint that is not in this range, the function wil
-}
utf8CodePointsAsBytes :: Compiling uni fun m ann => BS.ByteString -> m BS.ByteString
utf8CodePointsAsBytes bs =
case gracefullyDecodeUtf8Bytes (BS.unpack bs) of
Just bytes -> pure $ BS.pack bytes
case tryUtf8CodePointsAsBytes bs of
Just bytes -> pure bytes
Nothing -> throwPlain . CompilationError $
"ByteString literal is expected to contain only codepoints in the range 0 - 255 (0x00 - 0xFF)"

tryUtf8CodePointsAsBytes :: BS.ByteString -> Maybe BS.ByteString
tryUtf8CodePointsAsBytes = fmap BS.pack . gracefullyDecodeUtf8Bytes . BS.unpack
where
{-
Why not use 'Data.Text.Encoding'?
Expand Down
30 changes: 30 additions & 0 deletions plutus-tx-plugin/test/ByteStringLiterals/Lib.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -fno-full-laziness #-}
{-# OPTIONS_GHC -fno-ignore-interface-pragmas #-}
{-# OPTIONS_GHC -fno-omit-interface-pragmas #-}
{-# OPTIONS_GHC -fno-spec-constr #-}
{-# OPTIONS_GHC -fno-specialise #-}
{-# OPTIONS_GHC -fno-strictness #-}
{-# OPTIONS_GHC -fno-unbox-small-strict-fields #-}
{-# OPTIONS_GHC -fno-unbox-strict-fields #-}
{-# OPTIONS_GHC -fplugin PlutusTx.Plugin #-}
{-# OPTIONS_GHC -fplugin-opt PlutusTx.Plugin:target-version=1.1.0 #-}

module ByteStringLiterals.Lib where

import PlutusTx.Builtins (BuiltinByteStringHex (..))

{-

PlutusTx.Builtins.HasOpaque.stringToBuiltinByteStringHex
(GHC.Types.:
@GHC.Types.Char
(GHC.Types.C# 'f'#)
(GHC.Base.build
@GHC.Types.Char
(\ (@b) -> GHC.CString.unpackFoldrCString# @b "0d1"#)))
-}
{-# INLINEABLE hex #-}
hex :: BuiltinByteStringHex
hex = "f0d1"
16 changes: 12 additions & 4 deletions plutus-tx-plugin/test/ByteStringLiterals/Spec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@

module ByteStringLiterals.Spec (tests) where

import ByteStringLiterals.Lib qualified as Lib
import Data.ByteString (ByteString)
import Data.Char (chr)
import Data.Foldable (for_)
import Data.String (fromString)
import Data.Text.Encoding qualified as TE
import PlutusCore (someValue)
import PlutusCore (DefaultUni (..), Some (..), ValueOf (..), someValue)
import PlutusTx (CompiledCode, getPlcNoAnn)
import PlutusTx.Builtins (BuiltinByteString, BuiltinByteStringHex, BuiltinByteStringUtf8,
fromBuiltin)
Expand All @@ -21,8 +22,7 @@ import PlutusTx.Builtins.HasOpaque (stringToBuiltinByteString, stringToBuiltinBy
import PlutusTx.TH (compile)
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.HUnit (testCase, (@?=))
import UntypedPlutusCore (DefaultFun, DefaultUni, NamedDeBruijn, Program (_progTerm),
Term (Constant))
import UntypedPlutusCore (DefaultFun, NamedDeBruijn, Program (_progTerm), Term (Constant))

tests :: TestTree
tests =
Expand All @@ -36,6 +36,7 @@ tests =
, test_CompileBuiltinByteStringLiteral_utf8
, test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringUtf8
, test_CompileBuiltinByteStringLiteral_hex
, test_CompileBuiltinByteStringLiteral_hex_imported
, test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringHex
]
]
Expand Down Expand Up @@ -148,7 +149,7 @@ test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringUtf8 =

test_CompileBuiltinByteStringLiteral_hex :: TestTree
test_CompileBuiltinByteStringLiteral_hex =
testCase "BuiltinByteStringHex" do
testCase "BuiltinByteStringHex (local)" do
term compiledLiteral @?= expectedUplc
where
compiledLiteral :: CompiledCode BuiltinByteStringHex =
Expand Down Expand Up @@ -188,6 +189,13 @@ test_CompileBuiltinByteStringLiteral_hex =
\0f0e0d0c0b0a09080706050403020100"
||]
)

test_CompileBuiltinByteStringLiteral_hex_imported :: TestTree
test_CompileBuiltinByteStringLiteral_hex_imported =
testCase "BuiltinByteStringHex (imported)" $
term $$(compile [||Lib.hex||])
@?= Constant () (Some (ValueOf DefaultUniByteString "\240\209"))

test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringHex :: TestTree
test_CompileBuiltinByteStringLiteral_stringToBuiltinByteStringHex =
testCase "stringToBuiltinByteStringHex" do
Expand Down