From 9aeeb10bac0073c1bda152f2100c030b6c20ebdd Mon Sep 17 00:00:00 2001 From: imaqtkatt <135721694+imaqtkatt@users.noreply.github.com> Date: Mon, 18 May 2026 01:11:07 -0300 Subject: [PATCH 1/4] add initial ClassFile structures and read functions --- app/ClassReader.hs | 196 +++++++++++++++++++++++++++++++++++++++- app/Main.hs | 9 +- examples/SixSeven.class | Bin 0 -> 374 bytes examples/SixSeven.java | 11 +++ 4 files changed, 212 insertions(+), 4 deletions(-) create mode 100644 examples/SixSeven.class create mode 100644 examples/SixSeven.java diff --git a/app/ClassReader.hs b/app/ClassReader.hs index 8591829..9c00829 100644 --- a/app/ClassReader.hs +++ b/app/ClassReader.hs @@ -1,17 +1,83 @@ +{-# LANGUAGE ViewPatterns #-} + module ClassReader where -import Control.Monad.Except (ExceptT, runExceptT) +import Control.Monad (replicateM) +import Control.Monad.Except (ExceptT, MonadError (throwError), liftEither, runExceptT) import Control.Monad.Trans (lift) import Data.Binary (Get, Word16, Word32, Word8) import Data.Binary.Get (runGet) import Data.Binary.Get qualified as BG +import Data.ByteString (unpack) import Data.ByteString.Lazy qualified as BS +import Data.Text qualified as T +import Data.Text.Encoding qualified as TE type ReadResult = ExceptT ClassReadError Get -data ClassFile +data ClassFile = ClassFile + { classFileMinorVersion :: Word16, + classFileMajorVersion :: Word16, + classFileConstantPool :: ConstantPool, + -- TODO: properly interpret access flags + classFileAccessFlags :: Word16, + classFileThisClass :: String, + classFileSuperClass :: String, + classFileInterfaces :: [String], + classFileFields :: [FieldInfo], + classFileMethods :: [MethodInfo], + classFileAttributes :: [AttributeInfo] + } + deriving (Show) + +type ConstantPool = [PoolEntry] + +data PoolEntry + = InvalidEntry + | Utf8Info String + | IntegerInfo Word32 + | FloatInfo Word32 + | LongInfo Word32 Word32 + | DoubleInfo Word32 Word32 + | ClassInfo Word16 + | StringInfo Word16 + | FieldRefInfo Word16 Word16 + | MethodRefInfo Word16 Word16 + | InterfaceMethodInfo Word16 Word16 + | NameAndTypeInfo Word16 Word16 + deriving (Show) + +data FieldInfo = FieldInfo + { fieldInfoAccessFlags :: Word16, + fieldInfoName :: String, + fieldInfoDescriptor :: String, + fieldInfoAttributes :: [AttributeInfo] + } + deriving (Show) + +data MethodInfo = MethodInfo + { methodInfoAccessFlags :: Word16, + methodInfoName :: String, + methodInfoDescriptor :: String, + methodInfoAttributes :: [AttributeInfo] + } + deriving (Show) + +data AttributeInfo + = AttributeInfoConstantValue Word16 + | AttributeInfoCode Word16 Word16 [Word8] [(Word16, Word16, Word16, Word16)] [AttributeInfo] + | AttributeInfoExceptions [Word16] + | AttributeInfoLineNumberTable [(Word16, Word16)] + | AttributeInfoSourceFile String + deriving (Show) data ClassReadError + = InvalidMagic Word32 + | InvalidPoolEntryTag Word8 + | InvalidUtf8PoolIndex Word16 + | InvalidClassInfoPoolIdx Word16 + | UnsupportedAttribute String + deriving (Show) getU1 :: ReadResult Word8 getU1 = lift BG.getWord8 @@ -26,4 +92,128 @@ runClassReader :: BS.ByteString -> Either ClassReadError ClassFile runClassReader = (runGet . runExceptT) readClassFile readClassFile :: ReadResult ClassFile -readClassFile = undefined +readClassFile = do + (minorVersion, majorVersion, poolSize) <- readClassHeader =<< getU4 + constantPool <- readConstantPool (poolSize - 1) + accessFlags <- getU2 + thisClass <- getClassInfo constantPool =<< getU2 + superClass <- getClassInfo constantPool =<< getU2 + interfaces <- readInterfaces constantPool =<< getU2 + fields <- readFields constantPool =<< getU2 + methods <- readMethods constantPool =<< getU2 + attributes <- readAttributes constantPool =<< getU2 + pure $ + ClassFile + minorVersion + majorVersion + constantPool + accessFlags + thisClass + superClass + interfaces + fields + methods + attributes + +readClassHeader :: Word32 -> ReadResult (Word16, Word16, Word16) +readClassHeader 0xCAFEBABE = (,,) <$> getU2 <*> getU2 <*> getU2 +readClassHeader magic = throwError $ InvalidMagic magic + +-- TODO: must have a better way to do this +readConstantPool :: Word16 -> ReadResult [PoolEntry] +readConstantPool 0 = pure [] +readConstantPool n = do + entry <- readPoolEntry =<< getU1 + resolveEntry entry n + where + resolveEntry :: PoolEntry -> Word16 -> ReadResult [PoolEntry] + resolveEntry e@DoubleInfo {} n' = ((:) e) <$> ((:) InvalidEntry) <$> readConstantPool (n' - 1) + resolveEntry e@LongInfo {} n' = ((:) e) <$> ((:) InvalidEntry) <$> readConstantPool (n' - 1) + resolveEntry e n' = ((:) e) <$> readConstantPool (n' - 1) + + readPoolEntry :: Word8 -> ReadResult PoolEntry + readPoolEntry 1 = Utf8Info . T.unpack . TE.decodeUtf8 <$> (lift . BG.getByteString . fromIntegral =<< getU2) + readPoolEntry 3 = IntegerInfo <$> getU4 + readPoolEntry 4 = FloatInfo <$> getU4 + readPoolEntry 5 = LongInfo <$> getU4 <*> getU4 + readPoolEntry 6 = DoubleInfo <$> getU4 <*> getU4 + readPoolEntry 7 = ClassInfo <$> getU2 + readPoolEntry 8 = StringInfo <$> getU2 + readPoolEntry 9 = FieldRefInfo <$> getU2 <*> getU2 + readPoolEntry 10 = MethodRefInfo <$> getU2 <*> getU2 + readPoolEntry 11 = InterfaceMethodInfo <$> getU2 <*> getU2 + readPoolEntry 12 = NameAndTypeInfo <$> getU2 <*> getU2 + readPoolEntry tag = throwError $ InvalidPoolEntryTag tag + +readInterfaces :: ConstantPool -> Word16 -> ReadResult [String] +readInterfaces pool n = replicateM (fromIntegral n) (getClassInfo pool =<< getU2) + +readFields :: ConstantPool -> Word16 -> ReadResult [FieldInfo] +readFields pool n = replicateM (fromIntegral n) readFieldInfo + where + readFieldInfo :: ReadResult FieldInfo + readFieldInfo = do + accessFlags <- getU2 + name <- getUtf8Info pool =<< getU2 + descriptor <- getUtf8Info pool =<< getU2 + attributes <- readAttributes pool =<< getU2 + pure $ FieldInfo accessFlags name descriptor attributes + +readMethods :: ConstantPool -> Word16 -> ReadResult [MethodInfo] +readMethods pool n = replicateM (fromIntegral n) readMethodInfo + where + readMethodInfo :: ReadResult MethodInfo + readMethodInfo = do + accessFlags <- getU2 + methodName <- getUtf8Info pool =<< getU2 + methodDescriptor <- getUtf8Info pool =<< getU2 + methodAttributes <- readAttributes pool =<< getU2 + pure $ MethodInfo accessFlags methodName methodDescriptor methodAttributes + +resolveIdx :: Word16 -> Word16 +resolveIdx 0 = error "what" +resolveIdx n = n - 1 + +getUtf8Info :: ConstantPool -> Word16 -> ReadResult String +getUtf8Info pool (resolveIdx -> idx) = case pool !! fromIntegral idx of + Utf8Info s -> pure s + _ -> throwError $ InvalidUtf8PoolIndex idx + +getClassInfo :: ConstantPool -> Word16 -> ReadResult String +getClassInfo pool (resolveIdx -> idx) = case pool !! fromIntegral idx of + ClassInfo idx' -> getUtf8Info pool idx' + _ -> throwError $ InvalidClassInfoPoolIdx idx + +readAttributes :: ConstantPool -> Word16 -> ReadResult [AttributeInfo] +readAttributes pool n = replicateM (fromIntegral n) $ readAttributeInfo pool + +readAttributeInfo :: ConstantPool -> ReadResult AttributeInfo +readAttributeInfo pool = do + attributeName <- getUtf8Info pool =<< getU2 + attributeLen <- fromIntegral <$> getU4 + attributeInfo <- lift $ BG.isolate attributeLen $ runExceptT (matchAttribute attributeName) + liftEither attributeInfo + where + matchAttribute :: String -> ReadResult AttributeInfo + matchAttribute "Code" = do + maxStack <- getU2 + maxLocals <- getU2 + codeLen <- fromIntegral <$> getU4 + code <- unpack <$> (lift . BG.getByteString $ codeLen) + exnTable <- readExnTable =<< getU2 + attributes <- readAttributes pool =<< getU2 + pure $ AttributeInfoCode maxStack maxLocals code exnTable attributes + where + readExnTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16)] + readExnTable n = replicateM (fromIntegral n) exn + exn :: ReadResult (Word16, Word16, Word16, Word16) + exn = (,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 + matchAttribute "ConstantValue" = AttributeInfoConstantValue <$> getU2 + matchAttribute "Exceptions" = AttributeInfoExceptions <$> ((\n -> replicateM (fromIntegral n) getU2) =<< getU2) + matchAttribute "LineNumberTable" = AttributeInfoLineNumberTable <$> ((\n -> replicateM (fromIntegral n) readLineNumberTable) =<< getU2) + where + readLineNumberTable :: ReadResult (Word16, Word16) + readLineNumberTable = (,) <$> getU2 <*> getU2 + matchAttribute "SourceFile" = AttributeInfoSourceFile <$> (getUtf8Info pool =<< getU2) + -- TODO: returning UnsupportedAttribute here gives a weird behavior with BG.isolate lol + matchAttribute name = error name diff --git a/app/Main.hs b/app/Main.hs index f16f6f5..e48d922 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,4 +1,11 @@ module Main (main) where +import ClassReader +import Data.ByteString.Lazy qualified as BL + main :: IO () -main = putStrLn "Hello, Haskell!" +main = do + classFile <- ClassReader.runClassReader <$> BL.readFile "./examples/SixSeven.class" + case classFile of + Left err -> print err + Right classFile' -> print classFile' diff --git a/examples/SixSeven.class b/examples/SixSeven.class new file mode 100644 index 0000000000000000000000000000000000000000..0f9f55e28e959824153ec9c7814df9ba3f3aff0f GIT binary patch literal 374 zcmYLFO;5r=5PjPZN^60l#*5L!8}(p+0L2T&#DwUfhKm=sbwif4iz$Whx0Hj42Y-M+ z$~aq*9(HDS=Dm3{^ZoPr1>gq99t@ZcEFU&n48feQd6aWCiyqRs$SQ_bES0RT8O%XA zbrnJ6+Tre(->=yyh* zk+&~Dz}{+Lm#i*psEZzg9W>Oppsf22CqA{GM-9AN7wAr;LKmRd-z%Kgt*B)%-oj}( Ne-ai|52>kvqhD<4JO2Ox literal 0 HcmV?d00001 diff --git a/examples/SixSeven.java b/examples/SixSeven.java new file mode 100644 index 0000000..f1b1c6c --- /dev/null +++ b/examples/SixSeven.java @@ -0,0 +1,11 @@ +package examples; + +public class SixSeven { + static int sixSeven = 67; + + public static void main(String[] args) { + var unused = sixSeven; + var _ = unused + 2; + return; + } +} From 08713f2494e05aed3eb1500bd68600fb7075ca69 Mon Sep 17 00:00:00 2001 From: imaqtkatt <135721694+imaqtkatt@users.noreply.github.com> Date: Wed, 20 May 2026 16:01:36 -0300 Subject: [PATCH 2/4] add missing attributes --- app/ClassReader.hs | 77 +++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/app/ClassReader.hs b/app/ClassReader.hs index 9c00829..d8011bf 100644 --- a/app/ClassReader.hs +++ b/app/ClassReader.hs @@ -69,6 +69,9 @@ data AttributeInfo | AttributeInfoExceptions [Word16] | AttributeInfoLineNumberTable [(Word16, Word16)] | AttributeInfoSourceFile String + | AttributeInfoLocalVariableTable [(Word16, Word16, Word16, Word16, Word16)] + | AttributeInfoLocalVariableTypeTable [(Word16, Word16, Word16, Word16, Word16)] + | AttributeInfoDeprecated deriving (Show) data ClassReadError @@ -119,18 +122,15 @@ readClassHeader :: Word32 -> ReadResult (Word16, Word16, Word16) readClassHeader 0xCAFEBABE = (,,) <$> getU2 <*> getU2 <*> getU2 readClassHeader magic = throwError $ InvalidMagic magic --- TODO: must have a better way to do this readConstantPool :: Word16 -> ReadResult [PoolEntry] readConstantPool 0 = pure [] -readConstantPool n = do - entry <- readPoolEntry =<< getU1 - resolveEntry entry n +readConstantPool n = + (++) <$> (resolveEntry <$> (readPoolEntry =<< getU1)) <*> readConstantPool (n - 1) where - resolveEntry :: PoolEntry -> Word16 -> ReadResult [PoolEntry] - resolveEntry e@DoubleInfo {} n' = ((:) e) <$> ((:) InvalidEntry) <$> readConstantPool (n' - 1) - resolveEntry e@LongInfo {} n' = ((:) e) <$> ((:) InvalidEntry) <$> readConstantPool (n' - 1) - resolveEntry e n' = ((:) e) <$> readConstantPool (n' - 1) - + resolveEntry :: PoolEntry -> [PoolEntry] + resolveEntry e@LongInfo {} = [e, InvalidEntry] + resolveEntry e@DoubleInfo {} = [e, InvalidEntry] + resolveEntry e = [e] readPoolEntry :: Word8 -> ReadResult PoolEntry readPoolEntry 1 = Utf8Info . T.unpack . TE.decodeUtf8 <$> (lift . BG.getByteString . fromIntegral =<< getU2) readPoolEntry 3 = IntegerInfo <$> getU4 @@ -195,25 +195,58 @@ readAttributeInfo pool = do liftEither attributeInfo where matchAttribute :: String -> ReadResult AttributeInfo - matchAttribute "Code" = do - maxStack <- getU2 - maxLocals <- getU2 - codeLen <- fromIntegral <$> getU4 - code <- unpack <$> (lift . BG.getByteString $ codeLen) - exnTable <- readExnTable =<< getU2 - attributes <- readAttributes pool =<< getU2 - pure $ AttributeInfoCode maxStack maxLocals code exnTable attributes + matchAttribute "ConstantValue" = AttributeInfoConstantValue <$> getU2 + matchAttribute "Code" = uncurry5 AttributeInfoCode <$> readCode where + readCode :: ReadResult (Word16, Word16, [Word8], [(Word16, Word16, Word16, Word16)], [AttributeInfo]) + readCode = do + maxStack <- getU2 + maxLocals <- getU2 + codeLen <- fromIntegral <$> getU4 + code <- unpack <$> (lift . BG.getByteString $ codeLen) + exnTable <- readExnTable =<< getU2 + attributes <- readAttributes pool =<< getU2 + pure (maxStack, maxLocals, code, exnTable, attributes) readExnTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16)] readExnTable n = replicateM (fromIntegral n) exn exn :: ReadResult (Word16, Word16, Word16, Word16) exn = (,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 - matchAttribute "ConstantValue" = AttributeInfoConstantValue <$> getU2 - matchAttribute "Exceptions" = AttributeInfoExceptions <$> ((\n -> replicateM (fromIntegral n) getU2) =<< getU2) - matchAttribute "LineNumberTable" = AttributeInfoLineNumberTable <$> ((\n -> replicateM (fromIntegral n) readLineNumberTable) =<< getU2) + uncurry5 f (a, b, c, d, e) = f a b c d e + matchAttribute "StackMapTable" = undefined + matchAttribute "Exceptions" = AttributeInfoExceptions <$> (readExceptions =<< getU2) where - readLineNumberTable :: ReadResult (Word16, Word16) - readLineNumberTable = (,) <$> getU2 <*> getU2 + readExceptions :: Word16 -> ReadResult [Word16] + readExceptions n = replicateM (fromIntegral n) getU2 + matchAttribute "InnerClasses" = undefined + matchAttribute "EnclosingMethod" = undefined + matchAttribute "Synthetic" = undefined + matchAttribute "Signature" = undefined matchAttribute "SourceFile" = AttributeInfoSourceFile <$> (getUtf8Info pool =<< getU2) + matchAttribute "SourceDebugExtension" = undefined + matchAttribute "LineNumberTable" = AttributeInfoLineNumberTable <$> (readLineNumberTable =<< getU2) + where + readLineNumberTable :: Word16 -> ReadResult [(Word16, Word16)] + readLineNumberTable n = replicateM (fromIntegral n) readLineNumber + readLineNumber :: ReadResult (Word16, Word16) + readLineNumber = (,) <$> getU2 <*> getU2 + matchAttribute "LocalVariableTable" = AttributeInfoLocalVariableTable <$> (readLocalVariableTable =<< getU2) + where + readLocalVariableTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16, Word16)] + readLocalVariableTable n = replicateM (fromIntegral n) readLocalVariable + readLocalVariable :: ReadResult (Word16, Word16, Word16, Word16, Word16) + readLocalVariable = (,,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 <*> getU2 + matchAttribute "LocalVariableTypeTable" = AttributeInfoLocalVariableTypeTable <$> (readLocalVariableTypeTable =<< getU2) + where + readLocalVariableTypeTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16, Word16)] + readLocalVariableTypeTable n = replicateM (fromIntegral n) readLocalVariableType + readLocalVariableType :: ReadResult (Word16, Word16, Word16, Word16, Word16) + readLocalVariableType = (,,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 <*> getU2 + matchAttribute "Deprecated" = pure AttributeInfoDeprecated + matchAttribute "RuntimeVisibleAnnotations" = undefined + matchAttribute "RuntimeInvisibleAnnotations" = undefined + matchAttribute "RuntimeVisibleParameterAnnotations" = undefined + matchAttribute "RuntimeInvisibleParameterAnnotations" = undefined + matchAttribute "AnnotationDefault" = undefined + matchAttribute "BootstrapMethods" = undefined -- TODO: returning UnsupportedAttribute here gives a weird behavior with BG.isolate lol matchAttribute name = error name From 12033bea9c5000fe84bac0ac038e1f8bc6cacfca Mon Sep 17 00:00:00 2001 From: imaqtkatt <135721694+imaqtkatt@users.noreply.github.com> Date: Thu, 21 May 2026 14:51:35 -0300 Subject: [PATCH 3/4] parse float and double, fix readConstantPool --- app/ClassReader.hs | 160 +++++++++++++++++++++------------------- examples/SixSeven.class | Bin 374 -> 427 bytes examples/SixSeven.java | 1 + 3 files changed, 84 insertions(+), 77 deletions(-) diff --git a/app/ClassReader.hs b/app/ClassReader.hs index d8011bf..00a169a 100644 --- a/app/ClassReader.hs +++ b/app/ClassReader.hs @@ -5,7 +5,7 @@ module ClassReader where import Control.Monad (replicateM) import Control.Monad.Except (ExceptT, MonadError (throwError), liftEither, runExceptT) import Control.Monad.Trans (lift) -import Data.Binary (Get, Word16, Word32, Word8) +import Data.Binary (Get, Word16, Word32, Word64, Word8) import Data.Binary.Get (runGet) import Data.Binary.Get qualified as BG import Data.ByteString (unpack) @@ -36,9 +36,9 @@ data PoolEntry = InvalidEntry | Utf8Info String | IntegerInfo Word32 - | FloatInfo Word32 - | LongInfo Word32 Word32 - | DoubleInfo Word32 Word32 + | FloatInfo Float + | LongInfo Word64 + | DoubleInfo Double | ClassInfo Word16 | StringInfo Word16 | FieldRefInfo Word16 Word16 @@ -91,6 +91,9 @@ getU2 = lift BG.getWord16be getU4 :: ReadResult Word32 getU4 = lift BG.getWord32be +getU8 :: ReadResult Word64 +getU8 = lift BG.getWord64be + runClassReader :: BS.ByteString -> Either ClassReadError ClassFile runClassReader = (runGet . runExceptT) readClassFile @@ -124,26 +127,28 @@ readClassHeader magic = throwError $ InvalidMagic magic readConstantPool :: Word16 -> ReadResult [PoolEntry] readConstantPool 0 = pure [] -readConstantPool n = - (++) <$> (resolveEntry <$> (readPoolEntry =<< getU1)) <*> readConstantPool (n - 1) +readConstantPool n = do + (padded, wide) <- resolveEntry <$> (readPoolEntry =<< getU1) + ((++) padded) <$> readConstantPool (if wide then n - 2 else n - 1) where - resolveEntry :: PoolEntry -> [PoolEntry] - resolveEntry e@LongInfo {} = [e, InvalidEntry] - resolveEntry e@DoubleInfo {} = [e, InvalidEntry] - resolveEntry e = [e] + resolveEntry :: PoolEntry -> ([PoolEntry], Bool) + resolveEntry e@LongInfo {} = ([e, InvalidEntry], True) + resolveEntry e@DoubleInfo {} = ([e, InvalidEntry], True) + resolveEntry e = ([e], False) readPoolEntry :: Word8 -> ReadResult PoolEntry - readPoolEntry 1 = Utf8Info . T.unpack . TE.decodeUtf8 <$> (lift . BG.getByteString . fromIntegral =<< getU2) - readPoolEntry 3 = IntegerInfo <$> getU4 - readPoolEntry 4 = FloatInfo <$> getU4 - readPoolEntry 5 = LongInfo <$> getU4 <*> getU4 - readPoolEntry 6 = DoubleInfo <$> getU4 <*> getU4 - readPoolEntry 7 = ClassInfo <$> getU2 - readPoolEntry 8 = StringInfo <$> getU2 - readPoolEntry 9 = FieldRefInfo <$> getU2 <*> getU2 - readPoolEntry 10 = MethodRefInfo <$> getU2 <*> getU2 - readPoolEntry 11 = InterfaceMethodInfo <$> getU2 <*> getU2 - readPoolEntry 12 = NameAndTypeInfo <$> getU2 <*> getU2 - readPoolEntry tag = throwError $ InvalidPoolEntryTag tag + readPoolEntry = \case + 1 -> Utf8Info . T.unpack . TE.decodeUtf8 <$> (lift . BG.getByteString . fromIntegral =<< getU2) + 3 -> IntegerInfo <$> getU4 + 4 -> FloatInfo . fromIntegral <$> getU4 + 5 -> LongInfo . fromIntegral <$> getU8 + 6 -> DoubleInfo . fromIntegral <$> getU8 + 7 -> ClassInfo <$> getU2 + 8 -> StringInfo <$> getU2 + 9 -> FieldRefInfo <$> getU2 <*> getU2 + 10 -> MethodRefInfo <$> getU2 <*> getU2 + 11 -> InterfaceMethodInfo <$> getU2 <*> getU2 + 12 -> NameAndTypeInfo <$> getU2 <*> getU2 + tag -> throwError $ InvalidPoolEntryTag tag readInterfaces :: ConstantPool -> Word16 -> ReadResult [String] readInterfaces pool n = replicateM (fromIntegral n) (getClassInfo pool =<< getU2) @@ -195,58 +200,59 @@ readAttributeInfo pool = do liftEither attributeInfo where matchAttribute :: String -> ReadResult AttributeInfo - matchAttribute "ConstantValue" = AttributeInfoConstantValue <$> getU2 - matchAttribute "Code" = uncurry5 AttributeInfoCode <$> readCode - where - readCode :: ReadResult (Word16, Word16, [Word8], [(Word16, Word16, Word16, Word16)], [AttributeInfo]) - readCode = do - maxStack <- getU2 - maxLocals <- getU2 - codeLen <- fromIntegral <$> getU4 - code <- unpack <$> (lift . BG.getByteString $ codeLen) - exnTable <- readExnTable =<< getU2 - attributes <- readAttributes pool =<< getU2 - pure (maxStack, maxLocals, code, exnTable, attributes) - readExnTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16)] - readExnTable n = replicateM (fromIntegral n) exn - exn :: ReadResult (Word16, Word16, Word16, Word16) - exn = (,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 - uncurry5 f (a, b, c, d, e) = f a b c d e - matchAttribute "StackMapTable" = undefined - matchAttribute "Exceptions" = AttributeInfoExceptions <$> (readExceptions =<< getU2) - where - readExceptions :: Word16 -> ReadResult [Word16] - readExceptions n = replicateM (fromIntegral n) getU2 - matchAttribute "InnerClasses" = undefined - matchAttribute "EnclosingMethod" = undefined - matchAttribute "Synthetic" = undefined - matchAttribute "Signature" = undefined - matchAttribute "SourceFile" = AttributeInfoSourceFile <$> (getUtf8Info pool =<< getU2) - matchAttribute "SourceDebugExtension" = undefined - matchAttribute "LineNumberTable" = AttributeInfoLineNumberTable <$> (readLineNumberTable =<< getU2) - where - readLineNumberTable :: Word16 -> ReadResult [(Word16, Word16)] - readLineNumberTable n = replicateM (fromIntegral n) readLineNumber - readLineNumber :: ReadResult (Word16, Word16) - readLineNumber = (,) <$> getU2 <*> getU2 - matchAttribute "LocalVariableTable" = AttributeInfoLocalVariableTable <$> (readLocalVariableTable =<< getU2) - where - readLocalVariableTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16, Word16)] - readLocalVariableTable n = replicateM (fromIntegral n) readLocalVariable - readLocalVariable :: ReadResult (Word16, Word16, Word16, Word16, Word16) - readLocalVariable = (,,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 <*> getU2 - matchAttribute "LocalVariableTypeTable" = AttributeInfoLocalVariableTypeTable <$> (readLocalVariableTypeTable =<< getU2) - where - readLocalVariableTypeTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16, Word16)] - readLocalVariableTypeTable n = replicateM (fromIntegral n) readLocalVariableType - readLocalVariableType :: ReadResult (Word16, Word16, Word16, Word16, Word16) - readLocalVariableType = (,,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 <*> getU2 - matchAttribute "Deprecated" = pure AttributeInfoDeprecated - matchAttribute "RuntimeVisibleAnnotations" = undefined - matchAttribute "RuntimeInvisibleAnnotations" = undefined - matchAttribute "RuntimeVisibleParameterAnnotations" = undefined - matchAttribute "RuntimeInvisibleParameterAnnotations" = undefined - matchAttribute "AnnotationDefault" = undefined - matchAttribute "BootstrapMethods" = undefined - -- TODO: returning UnsupportedAttribute here gives a weird behavior with BG.isolate lol - matchAttribute name = error name + matchAttribute = \case + "ConstantValue" -> AttributeInfoConstantValue <$> getU2 + "Code" -> uncurry5 AttributeInfoCode <$> readCode + where + readCode :: ReadResult (Word16, Word16, [Word8], [(Word16, Word16, Word16, Word16)], [AttributeInfo]) + readCode = do + maxStack <- getU2 + maxLocals <- getU2 + codeLen <- fromIntegral <$> getU4 + code <- unpack <$> (lift . BG.getByteString $ codeLen) + exnTable <- readExceptionTable =<< getU2 + attributes <- readAttributes pool =<< getU2 + pure (maxStack, maxLocals, code, exnTable, attributes) + readExceptionTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16)] + readExceptionTable n = replicateM (fromIntegral n) readException + readException :: ReadResult (Word16, Word16, Word16, Word16) + readException = (,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 + uncurry5 f (a, b, c, d, e) = f a b c d e + "StackMapTable" -> undefined + "Exceptions" -> AttributeInfoExceptions <$> (readExceptions =<< getU2) + where + readExceptions :: Word16 -> ReadResult [Word16] + readExceptions n = replicateM (fromIntegral n) getU2 + "InnerClasses" -> undefined + "EnclosingMethod" -> undefined + "Synthetic" -> undefined + "Signature" -> undefined + "SourceFile" -> AttributeInfoSourceFile <$> (getUtf8Info pool =<< getU2) + "SourceDebugExtension" -> undefined + "LineNumberTable" -> AttributeInfoLineNumberTable <$> (readLineNumberTable =<< getU2) + where + readLineNumberTable :: Word16 -> ReadResult [(Word16, Word16)] + readLineNumberTable n = replicateM (fromIntegral n) readLineNumber + readLineNumber :: ReadResult (Word16, Word16) + readLineNumber = (,) <$> getU2 <*> getU2 + "LocalVariableTable" -> AttributeInfoLocalVariableTable <$> (readLocalVariableTable =<< getU2) + where + readLocalVariableTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16, Word16)] + readLocalVariableTable n = replicateM (fromIntegral n) readLocalVariable + readLocalVariable :: ReadResult (Word16, Word16, Word16, Word16, Word16) + readLocalVariable = (,,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 <*> getU2 + "LocalVariableTypeTable" -> AttributeInfoLocalVariableTypeTable <$> (readLocalVariableTypeTable =<< getU2) + where + readLocalVariableTypeTable :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16, Word16)] + readLocalVariableTypeTable n = replicateM (fromIntegral n) readLocalVariableType + readLocalVariableType :: ReadResult (Word16, Word16, Word16, Word16, Word16) + readLocalVariableType = (,,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 <*> getU2 + "Deprecated" -> pure AttributeInfoDeprecated + "RuntimeVisibleAnnotations" -> undefined + "RuntimeInvisibleAnnotations" -> undefined + "RuntimeVisibleParameterAnnotations" -> undefined + "RuntimeInvisibleParameterAnnotations" -> undefined + "AnnotationDefault" -> undefined + "BootstrapMethods" -> undefined + -- TODO: returning UnsupportedAttribute here gives a weird behavior with BG.isolate lol + name -> error name diff --git a/examples/SixSeven.class b/examples/SixSeven.class index 0f9f55e28e959824153ec9c7814df9ba3f3aff0f..317582f9b97ab664bd015e0060a92b6f8694cf9c 100644 GIT binary patch delta 240 zcmeyyw3?ag)W2Q(7#J8_7^Eh0#VfNp1RMbJ7#KJiI2Z(Y7z7!F7#TQIf-@_EQ_E8G z7#SE{CT3>UGXZ6}8F+x|I6z8(G&2Ju11kd?kQ4^;WPvmzkY?4|&cL`4$Y5j;0g@nT zkWx+tQ3f#}8>Cc|fe9!Av~UvxyNxt!f-OQhA6SBefd%XuE}&iF5F5;Z>NSA+K!8WU fc{2mM2m|kC27ZKAF0d>M&`GR7dnADBm>47hjP(}# delta 164 zcmZ3@{Edn0)W2Q(7#J8_7(^y=#ZT Date: Mon, 25 May 2026 13:35:57 -0300 Subject: [PATCH 4/4] wip parse attributes --- .envrc | 1 + .gitignore | 2 ++ app/ClassReader.hs | 67 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 64 insertions(+), 6 deletions(-) create mode 100644 .envrc diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..c4b17d7 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use_flake diff --git a/.gitignore b/.gitignore index c33954f..915c4eb 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ dist-newstyle/ + +.direnv diff --git a/app/ClassReader.hs b/app/ClassReader.hs index 00a169a..3bd956f 100644 --- a/app/ClassReader.hs +++ b/app/ClassReader.hs @@ -66,19 +66,46 @@ data MethodInfo = MethodInfo data AttributeInfo = AttributeInfoConstantValue Word16 | AttributeInfoCode Word16 Word16 [Word8] [(Word16, Word16, Word16, Word16)] [AttributeInfo] + | AttributeInfoStackMapTable [StackMapFrame] | AttributeInfoExceptions [Word16] + | AttributeInfoInnerClasses [(Word16, Word16, Word16, Word16)] + | AttributeInfoEnclosingMethod Word16 Word16 + | AttributeInfoSynthetic + | AttributeInfoSignature Word16 | AttributeInfoLineNumberTable [(Word16, Word16)] | AttributeInfoSourceFile String + | AttributeInfoSourceDebugExtension [Word8] | AttributeInfoLocalVariableTable [(Word16, Word16, Word16, Word16, Word16)] | AttributeInfoLocalVariableTypeTable [(Word16, Word16, Word16, Word16, Word16)] | AttributeInfoDeprecated deriving (Show) +data StackMapFrame + = SameFrame Word8 + | SameLocals1StackItemFrame Word8 VerificationTypeInfo + | SameLocals1StackItemFrameExtended Word16 VerificationTypeInfo + | ChopFrame Word16 Word8 + | SameFrameExtended Word16 + deriving (Show) + +data VerificationTypeInfo + = TopVariable + | IntegerVariable + | FloatVariable + | DoubleVariable + | LongVariable + | NullVariable + | UninitializedThisVariable + | ObjectVariable Word16 + | UninitializedVariable Word16 + deriving (Show) + data ClassReadError = InvalidMagic Word32 | InvalidPoolEntryTag Word8 | InvalidUtf8PoolIndex Word16 | InvalidClassInfoPoolIdx Word16 + | InvalidVerificationTypeTag Word8 | UnsupportedAttribute String deriving (Show) @@ -218,17 +245,45 @@ readAttributeInfo pool = do readException :: ReadResult (Word16, Word16, Word16, Word16) readException = (,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 uncurry5 f (a, b, c, d, e) = f a b c d e - "StackMapTable" -> undefined + "StackMapTable" -> AttributeInfoStackMapTable <$> (readStackMapFrames =<< getU2) + where + readStackMapFrames :: Word16 -> ReadResult [StackMapFrame] + readStackMapFrames n = replicateM (fromIntegral n) (readStackMapFrame =<< getU1) + readStackMapFrame :: Word8 -> ReadResult StackMapFrame + readStackMapFrame frameType + | frameType <= 63 = pure $ SameFrame frameType + | frameType <= 127 = SameLocals1StackItemFrame frameType <$> (readVerificationTypeInfo =<< getU1) + | frameType == 247 = SameLocals1StackItemFrameExtended <$> getU2 <*> (readVerificationTypeInfo =<< getU1) + | frameType <= 250 = ChopFrame <$> getU2 <*> pure (251 - frameType) + | frameType == 251 = SameFrameExtended <$> getU2 + | otherwise = error "TODO" + readVerificationTypeInfo :: Word8 -> ReadResult VerificationTypeInfo + readVerificationTypeInfo = \case + 0 -> pure TopVariable + 1 -> pure IntegerVariable + 2 -> pure FloatVariable + 3 -> pure DoubleVariable + 4 -> pure LongVariable + 5 -> pure NullVariable + 6 -> pure UninitializedThisVariable + 7 -> ObjectVariable <$> getU2 + 8 -> UninitializedVariable <$> getU2 + tag -> throwError $ InvalidVerificationTypeTag tag "Exceptions" -> AttributeInfoExceptions <$> (readExceptions =<< getU2) where readExceptions :: Word16 -> ReadResult [Word16] readExceptions n = replicateM (fromIntegral n) getU2 - "InnerClasses" -> undefined - "EnclosingMethod" -> undefined - "Synthetic" -> undefined - "Signature" -> undefined + "InnerClasses" -> AttributeInfoInnerClasses <$> (readInnerClasses =<< getU2) + where + readInnerClasses :: Word16 -> ReadResult [(Word16, Word16, Word16, Word16)] + readInnerClasses n = replicateM (fromIntegral n) readInnerClass + readInnerClass :: ReadResult (Word16, Word16, Word16, Word16) + readInnerClass = (,,,) <$> getU2 <*> getU2 <*> getU2 <*> getU2 + "EnclosingMethod" -> AttributeInfoEnclosingMethod <$> getU2 <*> getU2 + "Synthetic" -> pure AttributeInfoSynthetic + "Signature" -> AttributeInfoSignature <$> getU2 "SourceFile" -> AttributeInfoSourceFile <$> (getUtf8Info pool =<< getU2) - "SourceDebugExtension" -> undefined + "SourceDebugExtension" -> AttributeInfoSourceDebugExtension <$> BS.unpack <$> lift BG.getRemainingLazyByteString "LineNumberTable" -> AttributeInfoLineNumberTable <$> (readLineNumberTable =<< getU2) where readLineNumberTable :: Word16 -> ReadResult [(Word16, Word16)]