From bf2d455e7ec7afc98ac81686c4d50ee50a68b60e Mon Sep 17 00:00:00 2001 From: Lars Kuhtz Date: Sat, 8 Apr 2023 00:40:59 -0700 Subject: [PATCH 1/7] fix delta parsing for attoparsec --- pact.cabal | 2 + src/Pact/Parse.hs | 17 +++--- src/Pact/Types/ExpParser.hs | 17 ++++-- src/Pact/Types/Info.hs | 2 +- src/Pact/Types/Orphans.hs | 19 ------ src/Pact/Types/Parser.hs | 106 +++++++++++++++++++++++++++++++- tests/PactTests.hs | 2 + tests/Test/Pact/Parse.hs | 119 ++++++++++++++++++++++++++++++++++++ 8 files changed, 248 insertions(+), 36 deletions(-) create mode 100644 tests/Test/Pact/Parse.hs diff --git a/pact.cabal b/pact.cabal index fc8b4dc47..442bee3d3 100644 --- a/pact.cabal +++ b/pact.cabal @@ -389,6 +389,7 @@ test-suite hspec RoundTripSpec PrincipalSpec SizeOfSpec + Test.Pact.Parse if !impl(ghcjs) other-modules: @@ -453,4 +454,5 @@ test-suite hspec , sbv , servant-client , temporary >= 1.3 + , trifecta , yaml diff --git a/src/Pact/Parse.hs b/src/Pact/Parse.hs index f32e53001..28408e782 100644 --- a/src/Pact/Parse.hs +++ b/src/Pact/Parse.hs @@ -35,7 +35,6 @@ import Control.DeepSeq (NFData) import Control.Lens (Wrapped(..)) import Control.Monad import qualified Data.Aeson as A -import qualified Data.Attoparsec.Text as AP import qualified Data.ByteString as BS import Data.Char (digitToInt) import Data.Decimal @@ -55,6 +54,8 @@ import Pact.Types.Pretty (Pretty(..),viaShow) import Pact.Types.Info import Pact.Types.Term (ToTerm) +-- -------------------------------------------------------------------------- -- +-- Expression Parser -- | Main parser for Pact expressions. @@ -63,7 +64,7 @@ expr = do delt <- position let inf = do end <- position - let len = bytes end - bytes delt + let len = column end - column delt return $! Parsed delt (fromIntegral len) separator t s = symbol t >> (ESeparator . SeparatorExp s <$> inf) msum @@ -141,7 +142,7 @@ newtype ParsedDecimal = ParsedDecimal Decimal instance A.FromJSON ParsedDecimal where parseJSON (A.String s) = - ParsedDecimal <$> case AP.parseOnly (unPactParser number) s of + ParsedDecimal <$> case pactAttoParseOnly (unPactParser number) s of Right (LDecimal d) -> return d Right (LInteger i) -> return (fromIntegral i) _ -> fail $ "Failure parsing decimal string: " ++ show s @@ -167,7 +168,7 @@ newtype ParsedInteger = ParsedInteger Integer instance A.FromJSON ParsedInteger where parseJSON (A.String s) = - ParsedInteger <$> case AP.parseOnly (unPactParser number) s of + ParsedInteger <$> case pactAttoParseOnly (unPactParser number) s of Right (LInteger i) -> return i _ -> fail $ "Failure parsing integer string: " ++ show s parseJSON (A.Number n) = return $ ParsedInteger (round n) @@ -181,16 +182,19 @@ instance A.ToJSON ParsedInteger where instance Wrapped ParsedInteger +-- -------------------------------------------------------------------------- -- +-- Top Level Parsers + -- | "Production" parser: atto, parse multiple exprs. parseExprs :: Text -> Either String [Exp Parsed] -parseExprs = AP.parseOnly (unPactParser (whiteSpace *> exprs <* TF.eof)) +parseExprs = pactAttoParseOnly (unPactParser (whiteSpace *> exprs <* TF.eof)) {-# INLINABLE parseExprs #-} -- | Legacy version of "production" parser: atto, parse multiple exprs. This -- parser does not force EOF and thus accepts trailing inputs that are not valid -- pact code. legacyParseExprs :: Text -> Either String [Exp Parsed] -legacyParseExprs = AP.parseOnly (unPactParser (whiteSpace *> exprs)) +legacyParseExprs = pactAttoParseOnly (unPactParser (whiteSpace *> exprs)) {-# INLINABLE legacyParseExprs #-} -- | ParsedCode version of 'parseExprs' @@ -210,7 +214,6 @@ _parseF p fp = do let s = unpack $ decodeUtf8 bs fmap (,s) <$> TF.parseFromFileEx p fp - _parseS :: String -> TF.Result [Exp Parsed] _parseS = TF.parseString exprsOnly mempty diff --git a/src/Pact/Types/ExpParser.hs b/src/Pact/Types/ExpParser.hs index 614e8f1e6..827562b01 100644 --- a/src/Pact/Types/ExpParser.hs +++ b/src/Pact/Types/ExpParser.hs @@ -61,7 +61,6 @@ import Control.Monad.State import Control.Monad.Reader import Control.Arrow (second) import Prelude hiding (exp) -import Data.String import Control.Lens hiding (prism) import Data.Default import Data.Text (Text,unpack) @@ -108,7 +107,7 @@ data ParseState a = ParseState makeLenses ''ParseState -- | Current env has flag for try-narrow fix. -data ParseEnv = ParseEnv +newtype ParseEnv = ParseEnv { _peNarrowTry :: Bool } instance Default ParseEnv where def = ParseEnv True @@ -120,13 +119,19 @@ mkEmptyInfo e = Info (Just (mempty,e)) {-# INLINE mkStringInfo #-} mkStringInfo :: String -> MkInfo -mkStringInfo s d = Info (Just (fromString $ take (_pLength d) $ - drop (fromIntegral $ TF.bytes d) s,d)) +mkStringInfo s d = Info $ Just (Code code, d) + where + code = T.take len $ T.drop offset $ T.pack s + offset = fromIntegral $ TF.column (_pDelta d) + len = _pLength d {-# INLINE mkTextInfo #-} mkTextInfo :: T.Text -> MkInfo -mkTextInfo s d = Info (Just (Code $ T.take (_pLength d) $ - T.drop (fromIntegral $ TF.bytes d) s,d)) +mkTextInfo s d = Info $ Just (Code code, d) + where + code = T.take len $ T.drop offset s + offset = fromIntegral $ TF.column (_pDelta d) + len = _pLength d type ExpParse s a = ReaderT ParseEnv (StateT (ParseState s) (Parsec Void Cursor)) a diff --git a/src/Pact/Types/Info.hs b/src/Pact/Types/Info.hs index 5a59ecdce..856e44820 100644 --- a/src/Pact/Types/Info.hs +++ b/src/Pact/Types/Info.hs @@ -103,7 +103,7 @@ instance Default Info where def = Info Nothing instance SizeOf Info where sizeOf _ _ = 0 --- make an Info that refers to the indicated text +-- | Make an Info that refers to the indicated text mkInfo :: Text -> Info mkInfo t = Info $ Just (Code t,Parsed delt len) where len = T.length t diff --git a/src/Pact/Types/Orphans.hs b/src/Pact/Types/Orphans.hs index 37e7c6f15..8adbc7609 100644 --- a/src/Pact/Types/Orphans.hs +++ b/src/Pact/Types/Orphans.hs @@ -1,10 +1,8 @@ {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE TypeSynonymInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | -- Module : Pact.Types.Orphans @@ -19,10 +17,7 @@ module Pact.Types.Orphans where import Data.Serialize import Data.Decimal import qualified Data.Aeson as A -import Text.Trifecta.Combinators (DeltaParsing(..)) import Text.Trifecta.Delta -import qualified Data.Attoparsec.Text as AP -import qualified Data.Attoparsec.Internal.Types as APT import Data.Text (Text) import Data.Text.Encoding import Pact.Time.Internal (NominalDiffTime(..), UTCTime(..)) @@ -53,20 +48,6 @@ instance Serialize A.Value where instance NFData Delta - --- | Atto DeltaParsing instance provides 'position' only (with no support for --- hidden chars like Trifecta). -instance DeltaParsing AP.Parser where - line = return mempty - position = attoPos >>= \(APT.Pos p) -> let p' = fromIntegral p in return $ Columns p' p' -- p p - slicedWith f a = (`f` mempty) <$> a - rend = return mempty - restOfLine = return mempty - --- | retrieve pos from Attoparsec. -attoPos :: APT.Parser n APT.Pos -attoPos = APT.Parser $ \t pos more _lose win -> win t pos more pos - instance Default Text where def = "" instance Serialize Text where put = put . encodeUtf8 diff --git a/src/Pact/Types/Parser.hs b/src/Pact/Types/Parser.hs index 04d3b7e7f..382922c5d 100644 --- a/src/Pact/Types/Parser.hs +++ b/src/Pact/Types/Parser.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE Rank2Types #-} -- | @@ -10,21 +13,37 @@ -- module Pact.Types.Parser - ( + ( -- * Abstract Pact Parser PactParser(..) , symbols , style + + -- * Pact Attoparsec Parser + , PactAttoparsec(..) + , pactAttoParseOnly ) where - import Control.Applicative import Control.Monad -import Text.Trifecta +import Control.Monad.State + +import qualified Data.Attoparsec.Internal.Types as APT +import qualified Data.Attoparsec.Text as AP import qualified Data.HashSet as HS +import qualified Data.Text as T + import Text.Parser.Token.Highlight import Text.Parser.Token.Style +import Text.Trifecta +import Text.Trifecta.Delta as TF +-- -------------------------------------------------------------------------- -- +-- | Abstract Pact Parser +-- +-- On-chain this is use for Attoparsec as parser backend. In the repl trifecta +-- is used. +-- newtype PactParser p a = PactParser { unPactParser :: p a } deriving (Functor, Applicative, Alternative, Monad, MonadPlus, Parsing, CharParsing, DeltaParsing) @@ -45,3 +64,84 @@ style = IdentifierStyle "atom" (HS.fromList ["true","false"]) Symbol ReservedIdentifier + +-- -------------------------------------------------------------------------- -- +-- Pact Attoparsec backend parser + +-- | A wrapper around Attoparsec that adds DeltaParsing +-- +newtype PactAttoparsec a = PactAttoparsec + { runPactAttoparsec :: StateT Int AP.Parser a } + deriving (Functor, Applicative, Alternative, Monad, MonadPlus, MonadState Int) + deriving (Parsing, TokenParsing) + +pactAttoParseOnly :: PactAttoparsec a -> T.Text -> Either String a +pactAttoParseOnly = AP.parseOnly . flip evalStateT 0 . runPactAttoparsec + + +-- | Atto DeltaParsing instance provides 'position' only (with no support for +-- hidden chars like Trifecta). +-- +instance DeltaParsing PactAttoparsec where + line = return mempty + + -- Returns @Column NUMBER_OF_CHARS NUMBER_OF_BYTES@ + -- + -- Notes about legacy behavior: + -- + -- For text <2 it used to return for both values the number of 16-bit code + -- units words. With the UTF-16 encoding used by text <2 this is the total + -- number of characters plus number of characters that are encoded using two + -- 16-bit code units. For instance the Ugaritic letter U+1038D would result + -- in a positional length of two (two 16-bit code units). The string + -- "a\0x263A\0x1038D" ("a☺𐎍") would have positional length 4 (3 characters + -- plus one 2 16-bit character). + -- + -- In practice the old behavior was close enough to the number of characters + -- that it went mostly unnoticed and didn't cause harm on chain. The code + -- just assumed that it represented the number text characters. Those + -- numbers appear on chain (up to some block height) within info objects and + -- later still in failure messages. It is also relevant for extracting the + -- module text from the pact transaction before storing it in the pact db. + -- The presence of unicode characters can result in modules containing + -- dangling data because there are less characters in the module than what + -- is assumed based on the position information. + -- + -- For text >=2 the attoparsic position tracks just bytes and the internal + -- representation of UTF-8. For instance the Ugaritic letter U+1038D results + -- in a byte length of 4. The string "a\0x263A\0x1038D" ("a☺𐎍") has 8 bytes + -- (1 code unit plus 3 code unit plus 4 code units). + -- + position = do +#if MIN_VERSION_text(2,0,0) + APT.Pos !bytePos <- parserPos +#else + APT.Pos !bytePos <- (* 2) <$> parserPos +#endif + !charPos <- gets fromIntegral + return $ TF.Columns charPos (fromIntegral bytePos) + {-# INLINE position #-} + + slicedWith f a = (`f` mempty) <$> a + rend = return mempty + restOfLine = return mempty + +-- | retrieve pos from Attoparsec. +-- +-- The first parameter to the parser is the remaining available input, which +-- isn't of any help here. +-- +parserPos :: PactAttoparsec APT.Pos +parserPos = PactAttoparsec $ StateT $ \x -> + APT.Parser $ \t !pos more _lose win -> win t pos more (pos, x) + +instance CharParsing PactAttoparsec where + satisfy p = PactAttoparsec (satisfy p) <* modify' (+ 1) + {-# INLINE satisfy #-} + + string s = PactAttoparsec (string s) <* modify' (+ length s) + {-# INLINE string #-} + + text s = PactAttoparsec (text s) <* modify' (+ T.length s) + {-# INLINE text #-} + diff --git a/tests/PactTests.hs b/tests/PactTests.hs index 4dd96565b..a1af34e74 100644 --- a/tests/PactTests.hs +++ b/tests/PactTests.hs @@ -7,6 +7,7 @@ import qualified KeysetSpec import qualified RoundTripSpec import qualified PrincipalSpec import qualified SizeOfSpec +import qualified Test.Pact.Parse #ifndef ghcjs_HOST_OS import qualified PactTestsSpec @@ -39,6 +40,7 @@ main = hspec $ parallel $ do describe "RoundTripSpec" RoundTripSpec.spec describe "PrincipalSpec" PrincipalSpec.spec describe "SizeOfSpec" SizeOfSpec.spec + describe "Test.Pact.Parse" Test.Pact.Parse.spec #ifndef ghcjs_HOST_OS diff --git a/tests/Test/Pact/Parse.hs b/tests/Test/Pact/Parse.hs new file mode 100644 index 000000000..1789b0447 --- /dev/null +++ b/tests/Test/Pact/Parse.hs @@ -0,0 +1,119 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ViewPatterns #-} + +-- | +-- Module: Test.Pact.Parse +-- Copyright: Copyright © 2023 Kadena LLC. +-- License: MIT +-- Maintainer: Lars Kuhtz +-- Stability: experimental +-- +module Test.Pact.Parse +( spec +) where + +import qualified Data.ByteString as B +import Data.Char +import qualified Data.Text as T +import qualified Data.Text.Encoding as T + +import Test.Hspec +import Test.Hspec.QuickCheck +import Test.QuickCheck + +import qualified Text.Trifecta as D +import qualified Text.Trifecta.Delta as D + +-- internal modules + +import Pact.Parse +import Pact.Types.Parser +import Pact.Types.Exp +import Pact.Types.Info + +-- -------------------------------------------------------------------------- -- +-- Spec + +spec :: Spec +spec = do + spec_position + spec_parsePact + +-- -------------------------------------------------------------------------- -- +-- Tests + +-- | This tests the implementation of the `DeltaParsing` instance for +-- "Data.Attoparsec.Text" in "Pact.Types.Orphans". +-- +spec_position :: Spec +spec_position = describe "parsing deltas" $ do + describe "3-bytes utf-8" $ do + check "\"\x200f\"" + check "\"\x200f\x200f\"" + check "\"a\x200f\x200fz\"" + + describe "4-byte utf-8" $ do + check "\"\x1038D\"" + check "\"\x1038D\x10385\"" + + prop "parse delta" $ \x -> + let s = T.pack (show @T.Text x) + in getCols s `shouldBe` Right (cols s) + + prop "parse delta 2" $ \(Lit x) -> + getCols (quoted x) === Right (cols (quoted x)) + where + + check s = it (show s) $ getCols s `shouldBe` Right (cols s) + + getCols :: T.Text -> Either String D.Delta + getCols = pactAttoParseOnly (D.stringLiteral @_ @T.Text *> D.position <* D.eof) + +spec_parsePact :: Spec +spec_parsePact = describe "parsePact string literal" $ do + it "U-f002" $ check "\x200f" + it "U-f002U-f002" $ check "\x200f\x200f" + it "U-f002mU-f002" $ check "\x200fm\x200f" + it "aU-f002U-f002z" $ check "a\x200f\x200fz" + it "aU-f002mU-f002z" $ check "a\x200fm\x200fz" + prop "parse string literal" $ \(Lit s) -> check s + where + check s = + parsePact (quoted s) `shouldBe` Right (expected s (fromIntegral $ D.column $ cols (quoted s))) + expected s l = ParsedCode + { _pcCode = quoted s + , _pcExps = + [ ELiteral $ LiteralExp + { _litLiteral = LString {_lString = s} + , _litInfo = Parsed {_pDelta = D.Columns 0 0, _pLength = l} + } + ] + } + +-- -------------------------------------------------------------------------- -- +-- Utils + +newtype Lit = Lit { unLit :: T.Text } + deriving (Show, Eq, Ord) + +instance Arbitrary Lit where + arbitrary = Lit + . T.filter (/= '\\') + . T.filter (/= '"') + . T.filter (not . isControl) + <$> arbitrary + +quoted :: T.Text -> T.Text +quoted s = "\"" <> s <> "\"" + +cols :: T.Text -> D.Delta +#if MIN_VERSION_text(2,0,0) +cols s = D.Columns (fromIntegral $ T.length s) (fromIntegral $ B.length (T.encodeUtf8 s)) +#else +cols s = D.Columns (fromIntegral $ T.length s) (fromIntegral $ B.length (T.encodeUtf16LE s)) +#endif + From 7266396245abbb1ef2e4b72aff331c5372fa81ed Mon Sep 17 00:00:00 2001 From: Lars Kuhtz Date: Tue, 11 Apr 2023 17:26:44 -0700 Subject: [PATCH 2/7] some linting --- src/Pact/Compile.hs | 24 +++++++++++++----------- src/Pact/Types/ExpParser.hs | 6 +----- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Pact/Compile.hs b/src/Pact/Compile.hs index 77004bf2f..d76734f52 100644 --- a/src/Pact/Compile.hs +++ b/src/Pact/Compile.hs @@ -151,13 +151,15 @@ reservedAtom = bareAtom >>= \AtomExp{..} -> case HM.lookup _atomAtom reserveds o Just r -> commit >> return r compile :: ParseEnv -> MkInfo -> Exp Parsed -> Either PactError (Term Name) -compile pe mi e = let ei = mi <$> e in runCompile pe topLevel (initParseState ei) ei +compile pe mi e = runCompile pe topLevel (initParseState ei) ei + where + ei = mi <$> e compileExps :: Traversable t => ParseEnv -> MkInfo -> t (Exp Parsed) -> Either PactError (t (Term Name)) -compileExps pe mi exps = sequence $ compile pe mi <$> exps +compileExps pe mi exps = mapM (compile pe mi) exps moduleState :: Compile ModuleState -moduleState = use (psUser . csModule) >>= \m -> case m of +moduleState = use (psUser . csModule) >>= \case Just m' -> return m' Nothing -> context >>= tokenErr' "Must be declared within module" @@ -213,7 +215,7 @@ cToTV n | n < 26 = fromString [toC n] where toC i = toEnum (fromEnum 'a' + i) -sexp :: (Compile a) -> Compile a +sexp :: Compile a -> Compile a sexp body = withList' Parens (body <* eof) specialFormOrApp :: (Reserved -> Compile (Compile (Term Name))) -> Compile (Term Name) @@ -246,7 +248,7 @@ valueLevel = literals <|> varAtom <|> specialFormOrApp valueLevelForm where _ -> expected "value level form (let, let*, with-capability, cond)" moduleLevel :: Compile [Term Name] -moduleLevel = specialForm $ \r -> case r of +moduleLevel = specialForm $ \case RUse -> returnl useForm RDefconst -> returnl defconst RBless -> return (bless >> return []) @@ -411,7 +413,7 @@ meta modelAllowed = ModelAllowed -> a ModelNotAllowed -> unexpected' "@model not allowed in this declaration" atPairs = do - ps <- sort <$> (some (docPair <|> modelPair)) + ps <- sort <$> some (docPair <|> modelPair) case ps of [DocPair doc] -> return (Meta (Just doc) []) [ModelPair es] -> whenModelAllowed $ return (Meta Nothing es) @@ -447,7 +449,7 @@ defcapManaged dt = case dt of _ -> return Nothing where doDefcapMeta = symbol "@managed" *> - ((DMDefcap . DefcapManaged) <$> (doUserMgd <|> doAuto)) + (DMDefcap . DefcapManaged <$> (doUserMgd <|> doAuto)) doUserMgd = Just <$> ((,) <$> (_atomAtom <$> userAtom) <*> userVar) doAuto = pure Nothing doEvent = symbol "@event" *> pure (DMDefcap DefcapEvent) @@ -458,7 +460,7 @@ defpact = do (defname,returnTy) <- first _atomAtom <$> typedAtom args <- withList' Parens $ many arg m <- meta ModelAllowed - (body,bi) <- bodyForm' $ specialForm $ \r -> case r of + (body,bi) <- bodyForm' $ specialForm $ \case RStep -> return step RStepWithRollback -> return stepWithRollback _ -> expected "step or step-with-rollback" @@ -479,7 +481,7 @@ moduleForm = do modName' <- _atomAtom <$> userAtom gov <- Governance <$> ((Left <$> keysetNameStr) <|> (Right <$> userVar)) m <- meta ModelAllowed - use (psUser . csModule) >>= \cm -> case cm of + use (psUser . csModule) >>= \case Just {} -> syntaxError "Invalid nested module or interface" Nothing -> return () i <- contextInfo @@ -504,7 +506,7 @@ interface :: Compile (Term Name) interface = do iname' <- _atomAtom <$> bareAtom m <- meta ModelAllowed - use (psUser . csModule) >>= \ci -> case ci of + use (psUser . csModule) >>= \case Just {} -> syntaxError "invalid nested interface or module" Nothing -> return () info <- contextInfo @@ -514,7 +516,7 @@ interface = do iname = ModuleName iname' Nothing ihash = ModuleHash . pactHash . encodeUtf8 . _unCode $ code (bd,ModuleState{..}) <- withModuleState (initModuleState iname ihash) $ - bodyForm $ specialForm $ \r -> case r of + bodyForm $ specialForm $ \case RDefun -> return $ defSig Defun RDefconst -> return defconst RUse -> return useForm diff --git a/src/Pact/Types/ExpParser.hs b/src/Pact/Types/ExpParser.hs index 827562b01..37707254f 100644 --- a/src/Pact/Types/ExpParser.hs +++ b/src/Pact/Types/ExpParser.hs @@ -119,11 +119,7 @@ mkEmptyInfo e = Info (Just (mempty,e)) {-# INLINE mkStringInfo #-} mkStringInfo :: String -> MkInfo -mkStringInfo s d = Info $ Just (Code code, d) - where - code = T.take len $ T.drop offset $ T.pack s - offset = fromIntegral $ TF.column (_pDelta d) - len = _pLength d +mkStringInfo = mkTextInfo . T.pack {-# INLINE mkTextInfo #-} mkTextInfo :: T.Text -> MkInfo From 3e93eba01fc21793ca378d328d28d64593c6d734 Mon Sep 17 00:00:00 2001 From: Lars Kuhtz Date: Tue, 11 Apr 2023 17:41:06 -0700 Subject: [PATCH 3/7] one more test --- tests/Test/Pact/Parse.hs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/Test/Pact/Parse.hs b/tests/Test/Pact/Parse.hs index 1789b0447..44ddb8f10 100644 --- a/tests/Test/Pact/Parse.hs +++ b/tests/Test/Pact/Parse.hs @@ -42,6 +42,7 @@ spec :: Spec spec = do spec_position spec_parsePact + spec_parseModule -- -------------------------------------------------------------------------- -- -- Tests @@ -94,6 +95,20 @@ spec_parsePact = describe "parsePact string literal" $ do ] } +spec_parseModule :: Spec +spec_parseModule = do + it "parses a module" $ do + pm `shouldBe` m + i `shouldBe` Parsed (D.Columns 0 0) (T.length m) + where + Right (ParsedCode pm [EList ListExp { _listInfo = i } ]) = parsePact m + m = T.unlines + [ "(module m G" + , " (defcap G () true)" + , " (defun f () true)" + , ")" + ] + -- -------------------------------------------------------------------------- -- -- Utils From 58ca2102fc18698a78700a47b21aaee24ea3da46 Mon Sep 17 00:00:00 2001 From: Lars Kuhtz Date: Tue, 11 Apr 2023 17:42:21 -0700 Subject: [PATCH 4/7] remove redundant HasBytes instance --- src/Pact/Types/Info.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Pact/Types/Info.hs b/src/Pact/Types/Info.hs index 856e44820..889457765 100644 --- a/src/Pact/Types/Info.hs +++ b/src/Pact/Types/Info.hs @@ -71,7 +71,6 @@ instance Arbitrary Parsed where , Directed <$> genFilename <*> genPositiveInt64 <*> genPositiveInt64 <*> genPositiveInt64 <*> genPositiveInt64 ] instance NFData Parsed instance Default Parsed where def = Parsed mempty 0 -instance HasBytes Parsed where bytes = bytes . _pDelta instance Pretty Parsed where pretty = pretty . _pDelta From 0d898ec660e094aa7f6ac157954b288ebf13a6db Mon Sep 17 00:00:00 2001 From: Lars Kuhtz Date: Mon, 17 Apr 2023 12:33:21 -0700 Subject: [PATCH 5/7] use legacy parsing --- src/Pact/Parse.hs | 3 ++- src/Pact/Types/ExpParser.hs | 3 ++- src/Pact/Types/Parser.hs | 7 ++++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Pact/Parse.hs b/src/Pact/Parse.hs index 28408e782..6eb1254a2 100644 --- a/src/Pact/Parse.hs +++ b/src/Pact/Parse.hs @@ -64,7 +64,8 @@ expr = do delt <- position let inf = do end <- position - let len = column end - column delt + -- let len = column end - column delt + let len = bytes end - bytes delt return $! Parsed delt (fromIntegral len) separator t s = symbol t >> (ESeparator . SeparatorExp s <$> inf) msum diff --git a/src/Pact/Types/ExpParser.hs b/src/Pact/Types/ExpParser.hs index 37707254f..a3157430c 100644 --- a/src/Pact/Types/ExpParser.hs +++ b/src/Pact/Types/ExpParser.hs @@ -126,7 +126,8 @@ mkTextInfo :: T.Text -> MkInfo mkTextInfo s d = Info $ Just (Code code, d) where code = T.take len $ T.drop offset s - offset = fromIntegral $ TF.column (_pDelta d) + -- offset = fromIntegral $ TF.column (_pDelta d) + offset = fromIntegral $ TF.bytes (_pDelta d) len = _pLength d type ExpParse s a = ReaderT ParseEnv (StateT (ParseState s) (Parsec Void Cursor)) a diff --git a/src/Pact/Types/Parser.hs b/src/Pact/Types/Parser.hs index 382922c5d..2e4c9fe88 100644 --- a/src/Pact/Types/Parser.hs +++ b/src/Pact/Types/Parser.hs @@ -78,6 +78,7 @@ newtype PactAttoparsec a = PactAttoparsec pactAttoParseOnly :: PactAttoparsec a -> T.Text -> Either String a pactAttoParseOnly = AP.parseOnly . flip evalStateT 0 . runPactAttoparsec +#define LEGACY_PARSER 1 -- | Atto DeltaParsing instance provides 'position' only (with no support for -- hidden chars like Trifecta). @@ -115,10 +116,14 @@ instance DeltaParsing PactAttoparsec where position = do #if MIN_VERSION_text(2,0,0) APT.Pos !bytePos <- parserPos + !charPos <- gets fromIntegral +#elif LEGACY_PARSER == 1 + APT.Pos !bytePos <- parserPos + let !charPos = fromIntegral bytePos #else APT.Pos !bytePos <- (* 2) <$> parserPos -#endif !charPos <- gets fromIntegral +#endif return $ TF.Columns charPos (fromIntegral bytePos) {-# INLINE position #-} From 6bd2cea66d2fb534e71eba8564a00f0021781e30 Mon Sep 17 00:00:00 2001 From: Lars Kuhtz Date: Wed, 19 Apr 2023 00:04:17 -0700 Subject: [PATCH 6/7] use legacy parser --- pact.cabal | 2 +- src/Pact/Parse.hs | 16 ++++++++++------ src/Pact/Types/ExpParser.hs | 22 +++++++++++++--------- src/Pact/Types/Parser.hs | 2 -- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/pact.cabal b/pact.cabal index 9ff68889c..dec541a08 100644 --- a/pact.cabal +++ b/pact.cabal @@ -47,7 +47,7 @@ flag tests-in-lib manual: True library - + cpp-options: -DLEGACY_PARSER=1 -- common to all configurations: hs-source-dirs: src default-language: Haskell2010 diff --git a/src/Pact/Parse.hs b/src/Pact/Parse.hs index 6eb1254a2..435495535 100644 --- a/src/Pact/Parse.hs +++ b/src/Pact/Parse.hs @@ -1,12 +1,13 @@ -{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE Rank2Types #-} +{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE Rank2Types #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} -- | -- Module : Pact.Compile @@ -64,8 +65,11 @@ expr = do delt <- position let inf = do end <- position - -- let len = column end - column delt +#if LEGACY_PARSER == 1 let len = bytes end - bytes delt +#else + let len = column end - column delt +#endif return $! Parsed delt (fromIntegral len) separator t s = symbol t >> (ESeparator . SeparatorExp s <$> inf) msum diff --git a/src/Pact/Types/ExpParser.hs b/src/Pact/Types/ExpParser.hs index a3157430c..2417821c5 100644 --- a/src/Pact/Types/ExpParser.hs +++ b/src/Pact/Types/ExpParser.hs @@ -1,16 +1,17 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} {-# LANGUAGE ConstraintKinds #-} -{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE RankNTypes #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TupleSections #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} -- megaparsec <9.3 backard compatiblity @@ -126,9 +127,12 @@ mkTextInfo :: T.Text -> MkInfo mkTextInfo s d = Info $ Just (Code code, d) where code = T.take len $ T.drop offset s - -- offset = fromIntegral $ TF.column (_pDelta d) - offset = fromIntegral $ TF.bytes (_pDelta d) len = _pLength d +#if LEGACY_PARSER == 1 + offset = fromIntegral $ TF.bytes (_pDelta d) +#else + offset = fromIntegral $ TF.column (_pDelta d) +#endif type ExpParse s a = ReaderT ParseEnv (StateT (ParseState s) (Parsec Void Cursor)) a diff --git a/src/Pact/Types/Parser.hs b/src/Pact/Types/Parser.hs index 2e4c9fe88..5bdfa6d86 100644 --- a/src/Pact/Types/Parser.hs +++ b/src/Pact/Types/Parser.hs @@ -78,8 +78,6 @@ newtype PactAttoparsec a = PactAttoparsec pactAttoParseOnly :: PactAttoparsec a -> T.Text -> Either String a pactAttoParseOnly = AP.parseOnly . flip evalStateT 0 . runPactAttoparsec -#define LEGACY_PARSER 1 - -- | Atto DeltaParsing instance provides 'position' only (with no support for -- hidden chars like Trifecta). -- From a626f89514b94299596b6c00ece20509abd61bf3 Mon Sep 17 00:00:00 2001 From: Greg Hale Date: Thu, 20 Apr 2023 08:37:27 -0700 Subject: [PATCH 7/7] deltaparsing tweaks (#1203) * tweaks to deltaparser * Update tests/GasModelSpec.hs --------- Co-authored-by: Lars Kuhtz --- tests/Test/Pact/Parse.hs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/Test/Pact/Parse.hs b/tests/Test/Pact/Parse.hs index 44ddb8f10..891fce358 100644 --- a/tests/Test/Pact/Parse.hs +++ b/tests/Test/Pact/Parse.hs @@ -69,6 +69,11 @@ spec_position = describe "parsing deltas" $ do getCols (quoted x) === Right (cols (quoted x)) where + -- Check that `pactAttoParseOnly` parses a `Text` as a string literal + -- and produces the Delta that we would expect (it has the number of + -- characters and the number of bytes that `text` and `bytestring` + -- report). + check :: T.Text -> Spec check s = it (show s) $ getCols s `shouldBe` Right (cols s) getCols :: T.Text -> Either String D.Delta @@ -125,6 +130,8 @@ instance Arbitrary Lit where quoted :: T.Text -> T.Text quoted s = "\"" <> s <> "\"" +-- | Produce a trifecta Delta from a given `Text` string. +-- `Columns` arguments are the number of characters and the number of bytes. cols :: T.Text -> D.Delta #if MIN_VERSION_text(2,0,0) cols s = D.Columns (fromIntegral $ T.length s) (fromIntegral $ B.length (T.encodeUtf8 s))