From 9a805fa4128414fe95a879d1509191468484ee38 Mon Sep 17 00:00:00 2001 From: Marcin Benke Date: Wed, 3 Sep 2025 12:40:51 +0200 Subject: [PATCH 1/3] Core, Specialise, EmitCore, MatchCompiler: change syntax to __revert__ Note that the match compiler needs revert built into Core (i.e. __revert__), since it may revert with a message. Using a high-level revert function would introduce a dependency between the compiler and the std library. Co-authored-by: Alex Oltean --- src/Language/Hull.hs | 2 +- src/Language/Hull/Parser.hs | 2 +- src/Solcore/Backend/EmitHull.hs | 2 +- src/Solcore/Backend/Specialise.hs | 2 +- src/Solcore/Desugarer/MatchCompiler.hs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Language/Hull.hs b/src/Language/Hull.hs index b399a6576..7c20f5a0f 100644 --- a/src/Language/Hull.hs +++ b/src/Language/Hull.hs @@ -130,7 +130,7 @@ instance Pretty Stmt where <+> parens (hsep (punctuate comma (map ppr args))) <+> text "->" <+> ppr ret <+> lbrace $$ nest 2 (vcat (map ppr stmts)) $$ rbrace - ppr (SRevert s) = text "revert" <+> text (show s) + ppr (SRevert s) = text "__revert__" <+> text (show s) instance Pretty Pat where ppr (PVar x) = text x diff --git a/src/Language/Hull/Parser.hs b/src/Language/Hull/Parser.hs index 0741d2093..df65564d1 100644 --- a/src/Language/Hull/Parser.hs +++ b/src/Language/Hull/Parser.hs @@ -133,7 +133,7 @@ hullStmt = choice , SFunction <$> (pKeyword "function" *> identifier) <*> (parens (commaSep hullArg)) <*> (symbol "->" *> hullType) <*> hullBody , SAssembly <$> (pKeyword "assembly" *> yulBlock) - , SRevert <$> (pKeyword "revert" *> stringLiteral) + , SRevert <$> (pKeyword "__revert__" *> stringLiteral) , try (SAssign <$> (hullExpr <* symbol ":=") <*> hullExpr) , SExpr <$> hullExpr ] diff --git a/src/Solcore/Backend/EmitHull.hs b/src/Solcore/Backend/EmitHull.hs index dee8b2fbc..a1f3b9e6a 100644 --- a/src/Solcore/Backend/EmitHull.hs +++ b/src/Solcore/Backend/EmitHull.hs @@ -291,7 +291,7 @@ emitExp (Var x) = do Just e -> pure (e, []) Nothing -> pure (Hull.EVar (unwrapId x), []) -- special handling of revert -emitExp (Call _ (Id "revert" _) [Lit(StrLit s)]) = pure(Hull.EUnit, [Hull.SRevert s]) +emitExp (Call _ (Id "__revert__" _) [Lit(StrLit s)]) = pure(Hull.EUnit, [Hull.SRevert s]) emitExp (Call Nothing f as) = do (hullArgs, codes) <- unzip <$> mapM emitExp as let call = Hull.ECall (unwrapId f) hullArgs diff --git a/src/Solcore/Backend/Specialise.hs b/src/Solcore/Backend/Specialise.hs index cf1d18c5d..558fe60d2 100644 --- a/src/Solcore/Backend/Specialise.hs +++ b/src/Solcore/Backend/Specialise.hs @@ -341,7 +341,7 @@ specConApp i@(Id n conTy) args ty = do -- | Specialise a function call -- given actual arguments and the expected result type specCall :: Id -> [TcExp] -> Ty -> SM (Id, [TcExp]) -specCall i@(Id (Name "revert") e) args ty = pure (i, args) -- FIXME +specCall i@(Id (Name "__revert__") e) args ty = pure (i, args) -- FIXME specCall i args ty = do i' <- atCurrentSubst i ty' <- atCurrentSubst ty diff --git a/src/Solcore/Desugarer/MatchCompiler.hs b/src/Solcore/Desugarer/MatchCompiler.hs index f1fa51fcc..d8b58f850 100644 --- a/src/Solcore/Desugarer/MatchCompiler.hs +++ b/src/Solcore/Desugarer/MatchCompiler.hs @@ -537,7 +537,7 @@ matchError :: CompilerM Id matchError = do v <- (TyVar . TVar) <$> freshName - pure (Id (Name "revert") v) + pure (Id (Name "__revert__") v) errorLit :: Exp Id errorLit = Lit $ StrLit "Incomplete matching" From 7733668c356b9267899407bedfdfbfcd27365796 Mon Sep 17 00:00:00 2001 From: Alex Oltean Date: Tue, 16 Dec 2025 11:48:47 +0200 Subject: [PATCH 2/3] Desugarer: String literals converted to memory pointers --- sol-core.cabal | 1 + src/Solcore/Desugarer/StringLiteral.hs | 118 ++++++++++++++++++++++++ src/Solcore/Pipeline/SolcorePipeline.hs | 9 +- 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 src/Solcore/Desugarer/StringLiteral.hs diff --git a/sol-core.cabal b/sol-core.cabal index 4e929d423..275e32e92 100644 --- a/sol-core.cabal +++ b/sol-core.cabal @@ -67,6 +67,7 @@ library Solcore.Desugarer.ReplaceWildcard Solcore.Desugarer.ContractDispatch Solcore.Desugarer.ReplaceFunTypeArgs + Solcore.Desugarer.StringLiteral Solcore.Desugarer.UniqueTypeGen Solcore.Frontend.Lexer.SolcoreLexer Solcore.Frontend.Parser.SolcoreParser diff --git a/src/Solcore/Desugarer/StringLiteral.hs b/src/Solcore/Desugarer/StringLiteral.hs new file mode 100644 index 000000000..33fbd739d --- /dev/null +++ b/src/Solcore/Desugarer/StringLiteral.hs @@ -0,0 +1,118 @@ +{-| +Module : Solcore.Desugarer.StringLiteral +Description : Desugars string literals to memory pointers + +Every occurrence of a string literal expression is desugared to the application +of a function. This function stores each 32-byte chunk of the literal in memory +and returns a pointer to the total size in bytes, followed by the chunks. +-} +module Solcore.Desugarer.StringLiteral where + +import Solcore.Frontend.Syntax +import Data.Generics ( mkM, everywhereM ) +import Control.Monad.State (State, evalState, gets, modify) +import qualified Data.ByteString as BS (ByteString, length) +import Solcore.Primitives.Primitives (word) +import Language.Yul +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import Solcore.Desugarer.ContractDispatch (generateStoreBytes) + + +data Env = Env { + count :: Int + , strLitFuns :: [FunDef Name] + } +type StrLitM a = State Env a + +-- | Desugars all occurrences of string literals inside a compilation unit. +desugarStrLit :: CompUnit Name -> CompUnit Name +desugarStrLit c = c { contracts = evalState (desugarDecls $ contracts c) (Env 0 []) } where + desugarDecls decls = do + res <- everywhereM (mkM desugarInExp) decls + extras <- gets strLitFuns + return $ (TFunDef <$> extras) ++ res + +-- | Desugars all occurrences of string literals inside an expression. +desugarInExp :: Exp Name -> StrLitM (Exp Name) +desugarInExp (Lit (StrLit s)) = do + idx <- gets count + let strFun = literalToFun idx s + modify (\env -> env { count = idx + 1, strLitFuns = strFun : strLitFuns env } ) + return $ call (sigName $ funSignature strFun) [] +desugarInExp (TyExp exp tp) = do + desugared <- desugarInExp exp + return $ TyExp desugared tp +desugarInExp (Con c exps) = do + desugared <- mapM desugarInExp exps + return $ Con c desugared +desugarInExp (FieldAccess exp c) = do + desugared <- mapM desugarInExp exp + return $ FieldAccess desugared c +desugarInExp (Call exp f exps) = do + desugaredExp <- mapM desugarInExp exp + desugaredExps <- mapM desugarInExp exps + return $ Call desugaredExp f desugaredExps +desugarInExp (Lam params bdy tp) = do + desugared <- everywhereM (mkM desugarInExp) bdy + return $ Lam params desugared tp +desugarInExp (Cond e1 e2 e3) = do + desugared1 <- desugarInExp e1 + desugared2 <- desugarInExp e2 + desugared3 <- desugarInExp e3 + return $ Cond desugared1 desugared2 desugared3 +desugarInExp (Indexed e1 e2) = do + desugared1 <- desugarInExp e1 + desugared2 <- desugarInExp e2 + return $ Indexed desugared1 desugared2 +desugarInExp e = return e + +-- | Defines a new function named strLit*idx*, which stores string chunks and +-- their total size in memory and returns the starting pointer. +literalToFun :: Int -> String -> FunDef Name +literalToFun idx s = FunDef { + funSignature = Signature [] [] (Name $ "strLit" ++ show idx) [] (Just memoryString) + , funDefBody = (byteStringToBody . T.encodeUtf8 . T.pack) s + } + +-- | Chunks the bytestring and generates the code to store it in memory. +byteStringToBody :: BS.ByteString -> Body Name +byteStringToBody bs = decl ++ asm ++ ret where + byteSize = toInteger $ BS.length bs + wordSize = (byteSize + 31) `div` 32 + decl = [ + declareAssignWord "size" (intLiteral byteSize), + declareAssignWord "ptr" (call "allocate_memory" [intLiteral $ (wordSize + 1) * 32]), + declareWord "headPtr" + ] + asm = [Asm $ assign ++ store] + assign = [ + YExp $ YCall "mstore" [YIdent "ptr", YIdent "size"], + YAssign ["headPtr"] (YCall "add" [YIdent "ptr", YLit $ YulNumber 32]) + ] + store = generateStoreBytes bs "headPtr" + ret = [ + Return $ Con "memory" [Var "ptr"] + ] + +--- Helpers --- + +-- | Generate the memory(string) type +memoryString :: Ty +memoryString = TyCon (Name "memory") [TyCon (Name "string") []] + +-- | Generate the literal expression corresponding to an integer +intLiteral :: Integer -> Exp Name +intLiteral i = Lit $ IntLit i + +-- | Declare new variable of type word, with given name and no binding +declareWord :: Name -> Stmt Name +declareWord nm = Let nm (Just word) Nothing + +-- | Declare new variable of type word, with given name and bound to expression +declareAssignWord :: Name -> Exp Name -> Stmt Name +declareAssignWord nm e = Let nm (Just word) (Just e) + +-- | Call top level function with given name and arguments +call :: Name -> [Exp Name] -> Exp Name +call = Call Nothing diff --git a/src/Solcore/Pipeline/SolcorePipeline.hs b/src/Solcore/Pipeline/SolcorePipeline.hs index 16a9ea167..a2255c5e9 100644 --- a/src/Solcore/Pipeline/SolcorePipeline.hs +++ b/src/Solcore/Pipeline/SolcorePipeline.hs @@ -18,6 +18,7 @@ import Solcore.Desugarer.IndirectCall (indirectCall) import Solcore.Desugarer.MatchCompiler (matchCompiler) import Solcore.Desugarer.ReplaceWildcard (replaceWildcard) import Solcore.Desugarer.ReplaceFunTypeArgs +import Solcore.Desugarer.StringLiteral import Solcore.Frontend.Parser.SolcoreParser import Solcore.Frontend.Pretty.SolcorePretty import Solcore.Frontend.Syntax.ElabTree @@ -91,9 +92,15 @@ compile opts = runExceptT $ do putStrLn "> Dispatch:" putStrLn $ pretty dispatched + -- Eliminate string literals + let noStrLit = desugarStrLit dispatched + liftIO $ when verbose $ do + putStrLn "> String literal elimination:" + putStrLn $ pretty noStrLit + -- SCC analysis connected <- ExceptT $ timeItNamed "SCC " $ - sccAnalysis dispatched + sccAnalysis noStrLit liftIO $ when verbose $ do putStrLn "> SCC Analysis:" From 933bb19d063c38475a9570404f2aaf5901cf71dd Mon Sep 17 00:00:00 2001 From: Alex Oltean Date: Tue, 16 Dec 2025 16:09:39 +0200 Subject: [PATCH 3/3] Cases: Add tests for string literal desugaring --- test/Cases.hs | 2 + test/examples/cases/assert-mem-eq.solc | 82 ++++++++++++++++++++++++++ test/examples/cases/revert-msg.solc | 14 +++++ 3 files changed, 98 insertions(+) create mode 100644 test/examples/cases/assert-mem-eq.solc create mode 100644 test/examples/cases/revert-msg.solc diff --git a/test/Cases.hs b/test/Cases.hs index 9755e3484..f7659e587 100644 --- a/test/Cases.hs +++ b/test/Cases.hs @@ -265,6 +265,8 @@ cases = , runTestExpectingFailure "instance-context-wrong-kind.solc" caseFolder , runTestForFile "instance-closure-error.solc" caseFolder , runTestExpectingFailure "instance-closure-error-invalid-member.solc" caseFolder + , runTestForFile "revert-msg.solc" caseFolder + , runTestForFile "assert-mem-eq.solc" caseFolder ] where caseFolder = "./test/examples/cases" diff --git a/test/examples/cases/assert-mem-eq.solc b/test/examples/cases/assert-mem-eq.solc new file mode 100644 index 000000000..34a0286b7 --- /dev/null +++ b/test/examples/cases/assert-mem-eq.solc @@ -0,0 +1,82 @@ +import std; + +function revert(reasonPtr: memory(string)) -> () { + let ptr = Typedef.rep(reasonPtr); + assembly { + revert(add(ptr, 32), mload(ptr)) + } +} + +function assert(condition: bool, elseMsg: memory(string)) -> () { + match condition { + | false => revert(elseMsg); + | _ => return (); + } +} + +/* + Compare dynamically sized memory regions based on their contents. + Assumes first word in each region holds content size in bytes. +*/ +forall t . instance memory(t) : Eq { + function eq(x: memory(t), y: memory(t)) -> bool { + let xptr = Typedef.rep(x); + let yptr = Typedef.rep(y); + let xsize: word; + let ysize: word; + assembly { + xsize := mload(xptr) + ysize := mload(yptr) + } + if (xsize != ysize) { + return false; + } + let isEq = 1; + assembly { + xptr := add(xptr, 32) + yptr := add(yptr, 32) + for { } gt(xsize, 31) { { xsize := sub(xsize, 32) } } { + if iszero(eq(mload(xptr), mload(yptr))) { + isEq := 0 + break + } + xptr := add(xptr, 32) + yptr := add(yptr, 32) + } + if and(isEq, gt(xsize, 0)) { + let shiftSize := mul(sub(32, xsize), 8) + if iszero(eq(shr(shiftSize, mload(xptr)), shr(shiftSize, mload(yptr)))) { + isEq := 0 + } + } + } + match isEq { + | 1 => return true; + | _ => return false; + } + } +} +contract EqTest { + function equalityRevert() -> () { + assert("" == "", "Should be equal: ''"); + + assert("some string" == "some string", "Should be equal: 'some string'"); + + assert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "Should be equal: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'"); + + assert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcd" == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcd", + "Should be equal (2 word size): 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcd'"); + + assert("some string" != "some string!", "Should not be equal (different sizes)"); + + assert("some string" != "some strin!", "Should not be equal (different content)"); + + assert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcda" != "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabeda", + "Should not be equal (2 word size)"); + + assert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcda" != + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabeda", + "Should not be equal (4 word size)"); + } +} diff --git a/test/examples/cases/revert-msg.solc b/test/examples/cases/revert-msg.solc new file mode 100644 index 000000000..246b3f4f9 --- /dev/null +++ b/test/examples/cases/revert-msg.solc @@ -0,0 +1,14 @@ +import std; + +function revert(reasonPtr : memory(string)) -> () { + let ptr = Typedef.rep(reasonPtr); + assembly { + revert(add(ptr, 32), mload(ptr)) + } +} + +contract Revert { + function revertFun() -> () { + revert("Loreɱ ipsum dolor sit 📜 amet, ✍️ consectetʊr adipiscing elit. Ưt dolor tellus, interdum id cursus ut, rhoncus sed velit. Quisque nec nisi nunc. Ut venenatis, erat sit amet vulputate ultricies, lacus ipsum pretium lorem, at ultrices lorem sem a tellus. Mauris sed pellentesque magna. Integer in libero sapien. Maecenas blandit aliquet nibh, vitae imperdiet nunc sodales eu. Nam vel tortor id felis vestibulum accumsan ac eget odio. Nam sem lorem, auctor non nisl accumsan, suscipit venenatis libero. Nam a egestas nisi. Sed malesuada ligula non ex aliquam porttitor sit amet eu lorem. Donec sed blandit justo. Integer bibendum feugiat sapien vitae auctor. Vivamus non enim vel mi malesuada bibendum non viverra lorem. Sed interdum vehicula mi, in blandit risus dapibus nec. Maecenas blandit tortor mi, non pulvinar odio molestie ut. Nullam a urna pulvinar, pellentesque leo in, iaculis massa. Sed eleifend in diam vel porttitor. Integer mollis suscipit ullamcorper. Vivamus vestibulum, eros at sagittis congue, enim mauris euismod ipsum, in gravida risus augue sed odio. Etiam tortor nunc, tempor quis orci ac, rutrum iaculis orci. Nunc efficitur velit nec tellus ultrices pellentesque. Proin et fringilla tortor, sit amet congue odio. Aliquam id justo lacus. Suspendisse non odio felis. Suspendisse volutpat velit dui, sit amet egestas nisl consequat et. Nulla facilisi. Integer nec nulla ac lectus imperdiet convallis sed ut tellus. Proin interdum efficitur velit, id sollicitudin lorem sodales fringilla. Vestibulum ornare, nisl et scelerisque scelerisque, risus elit faucibus massa, at commodo eros arcu sit amet eros. Praesent ultricies eu magna eu gravida. Nulla ultrices id augue sit amet congue. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Vivamus condimentum, nisi ut sollicitudin mollis, nunc ante imperdiet elit, sed eleifend purus enim vitae elit. Proin diam augue, tempor a vestibulum sit amet, euismod a ex. Integer auctor eget quam vitae pellentesque. Quisque luctus massa ut ante lobortis scelerisque. Interdum et malesuada fames ac ante ipsum primis in faucibus. Donec a imperdiet ante."); + } +}