Adds tests; more robust logic.

parent 8bc44c72
......@@ -15,44 +15,108 @@
module ATTParser where
import Control.Applicative ((<|>))
import Data.Word (Word)
import Data.Word (Word32, Word64)
import Data.Int (Int64)
import Data.Char (isDigit, isSpace)
import Data.Bits (shiftL, shiftR, (.|.))
import Data.Maybe (fromMaybe)
type ASM = [(String, [(String, String)])]
data Inst = Ident String
| Long Word32
| Quad Word64
| Ref String
| Ascii String
deriving Show
parse :: FilePath -> IO ASM
parse f = do
lns <- lines `fmap` readFile f
return $ foldl parseLine [] lns
type ASM = [(String, Inst)]
where parseLine :: ASM -> String -> ASM
parseLine [] ('\t':_) = []
parseLine ((ident,attr):xs) ('\t':line) = let (key, val) = span (`notElem` " \t") line
in (ident,(key,trim val):attr):xs
parseLine xs line = let ident = takeWhile (/= ':') line in (ident,[]):xs
isIdent :: Inst -> Bool
isIdent (Ident _) = True
isIdent _ = False
trim :: String -> String
trim = reverse . dropWhile (`elem` " \t") . reverse . dropWhile (`elem` " \t")
-- | generalized @words@.
words' :: (a -> Bool) -> [a] -> [[a]]
words' p s = case dropWhile p s of
[] -> []
s' -> w : words' p s''
where (w, s'') = break p s'
-- | lookup a constant numeric value. Drop any comments indicated by ';', '#' or '@'.
-- We assume the value is either in the `.long` or `.quad` attribute.
lookupConst :: String -> ASM -> Maybe String
lookupConst key asm = lookup key asm >>= \x -> ((trim . takeWhile (`notElem` ";#@")) `fmap` (lookup ".long" x <|> lookup ".quad" x))
-- the compiler may emit something like `.space 4` to indicate 0000.
<|> (const "0" `fmap` lookup ".space" x)
isNumber :: String -> Bool
isNumber ('-':x) = all isDigit x
isNumber ('+':x) = all isDigit x
isNumber x = all isDigit x
-- | extract a C String in the most basic sense we can.
-- the .asciz directive doesn't contain the \0 terminator.
lookupASCII :: String -> ASM -> Maybe String
lookupASCII key asm = lookup key asm >>= \x -> read `fmap` (lookup ".ascii" x) <|> (((++ "\0") . read) `fmap` (lookup ".asciz" x))
-- | process the assembly instructions, filtering out
-- identifiers and constant values.
preprocess :: String -> [Inst]
preprocess [] = []
preprocess ('\t':attr) = let (h, t) = break isSpace attr
in case h:words' (=='\t') t of
(".quad":x:_) | isNumber (w x) -> [Quad $ read (w x)]
| otherwise -> [Ref $ (w x)]
(".xword":x:_)| isNumber (w x) -> [Quad $ read (w x)]
| otherwise -> [Ref $ (w x)]
(".long":x:_) | isNumber (w x) -> [Long $ read (w x)]
| otherwise -> [Ref $ (w x)]
(".space":x:_)| (w x) == "4" -> [Long 0]
| (w x) == "8" -> [Quad 0]
(".ascii":x:_) -> [Ascii $ read x]
(".asciz":x:_) -> [Ascii $ read x ++ "\0"]
_ -> []
where w = head . words
preprocess ('.':'z':'e':'r':'o':'f':'i':'l':'l':' ':x) = case words' (==',') x of
(_seg:_sect:sym:size:_) | size == "4" -> [Ident sym, Long 0]
| size == "8" -> [Ident sym, Quad 0]
_ -> []
preprocess (c:cs) | not (isSpace c) = [Ident $ takeWhile (/= ':') (c:cs)]
| otherwise = []
lookupInt :: String -> ASM -> Maybe Int
lookupInt key = fmap read . lookupConst key
-- | turn the list of instructions into an associated list
parseInsts :: [Inst] -> [(String, Inst)]
parseInsts [] = []
parseInsts (Ident name:xs) = case break isIdent xs of
([], xs') -> parseInsts xs'
(is, xs') -> (name, combineInst is):parseInsts xs'
parseInsts _ = error "Invalid instructions"
lookupInteger :: String -> ASM -> Maybe Integer
lookupInteger key = fmap read . lookupConst key
-- | combine instructions (e.g. two long into a quad)
combineInst :: [Inst] -> Inst
combineInst [Quad i] = Quad i
combineInst [Long i] = Quad (fromIntegral i)
combineInst [Long h, Long l] = Quad $ (shiftL (fromIntegral h) 32) .|. fromIntegral l
combineInst [Ref s] = Ref s
combineInst [Ascii s] = Ascii s
combineInst is = error $ "Cannot combine instructions: " ++ show is
lookupUInteger :: String -> ASM -> Maybe Integer
lookupUInteger key = fmap (fromIntegral . (read :: String -> Word)) . lookupConst key
-- | inline references
inlineRef :: [(String, Inst)] -> [(String, Inst)]
inlineRef xs = map go xs
where go (k, Ref name) = (k, fromMaybe (error $ "failed to find reference " ++ show name) $ lookup name xs)
go x = x
lookupCString :: String -> ASM -> Maybe String
lookupCString key asm = lookupConst key asm >>= flip lookupASCII asm
fixWordOrder :: [(String, Inst)] -> [(String, Inst)]
fixWordOrder xs = case lookupInteger "___hsc2hs_BOM___" xs of
Just 1 -> map go xs
_ -> xs
where go (k, Quad w) = (k, Quad $ shiftL w 32 .|. shiftR w 32)
go x = x
parse :: FilePath -> IO [(String, Inst)]
parse f = (fixWordOrder . inlineRef . parseInsts . concatMap preprocess . lines) `fmap` readFile f
-- | lookup a symbol without or with underscore prefix
lookup_ :: String -> [(String,b)] -> Maybe b
lookup_ k l = lookup k l <|> lookup ("_" ++ k) l
lookupString :: String -> [(String, Inst)] -> Maybe String
lookupString k l = case (lookup_ k l) of
Just (Ascii s) -> Just s
_ -> Nothing
lookupInteger :: String -> [(String, Inst)] -> Maybe Integer
lookupInteger k l = case (lookup_ k l, lookup_ (k ++ "___hsc2hs_sign___") l) of
(Just (Quad i), Just (Quad 1)) -> Just (fromIntegral (fromIntegral i :: Int64))
(Just (Quad i), _) -> Just (fromIntegral i)
_ -> Nothing
......@@ -583,10 +583,12 @@ runCompileAsmIntegerTest (ZCursor s@(Special _ _ value) above below) = do
(concatMap outHeaderCProg' above) ++
outHeaderCProg' s ++
-- the test
"extern int " ++ key ++ "___signed___;\n" ++
"int " ++ key ++ "___signed___ = (" ++ value ++ ") < 0;\n" ++
"extern long long " ++ key ++ ";\n" ++
"long long " ++ key ++ " = (" ++ value ++ ");\n"++
"extern unsigned long long ___hsc2hs_BOM___;\n" ++
"unsigned long long ___hsc2hs_BOM___ = 0x100000000;\n" ++
"extern unsigned long long " ++ key ++ "___hsc2hs_sign___;\n" ++
"unsigned long long " ++ key ++ "___hsc2hs_sign___ = (" ++ value ++ ") < 0;\n" ++
"extern unsigned long long " ++ key ++ ";\n" ++
"unsigned long long " ++ key ++ " = (" ++ value ++ ");\n"++
(concatMap outHeaderCProg' below)
runCompileExtract key test
runCompileAsmIntegerTest _ = error "runCompileAsmIntegerTestargument isn't a Special"
......@@ -601,10 +603,7 @@ runCompileExtract k testStr = do
(["-S", "-c", cFile, "-o", sFile] ++ [f | CompFlag f <- flags])
(Just stdout)
asm <- liftTestIO $ ATT.parse sFile
case (== 1) `fmap` (ATT.lookupInteger (k ++ "___signed___") asm) of
Just False -> return $ fromMaybe (error "Failed to extract unsigned integer") (ATT.lookupUInteger k asm)
Just True -> return $ fromMaybe (error "Failed to extract integer") (ATT.lookupInteger k asm)
Nothing -> error "Failed to extract integer sign information"
return $ fromMaybe (error "Failed to extract integer") (ATT.lookupInteger k asm)
runCompileTest :: String -> TestMonad Bool
runCompileTest testStr = do
......
module Main where
import Test.Tasty.Hspec
import ATTParser
import Control.Monad (forM_)
main :: IO ()
main = hspec $ do
describe "asm parser" $ do
forM_ [("x86_64 linux", "test/asm/x86_64-linux.s")
,("x86_64 macos", "test/asm/x86_64-mac.s")
,("x86_64 mingw", "test/asm/x86_64-mingw32.s")
,("aarch64 ios", "test/asm/aarch64-ios.s")
,("aarch64 linux","test/asm/aarch64.s")]
$ \(d, f) ->do
context d $ do
x <- runIO $ parse f
it "x should be 1" $ do
lookupInteger "x" x `shouldBe` (Just 1)
it "z should be 0xffffffffffffffff" $ do
lookupInteger "y" x `shouldBe` (Just 0xffffffffffffffff)
it "z should be -1" $ do
lookupInteger "z" x `shouldBe` (Just (-1))
it "t should be \"Hello World\\\"\\n\\0\"" $ do
lookupString "t" x `shouldBe` (Just "Hello World\"\n\0")
forM_ [("arm ios", "test/asm/arm-ios.s")
,("arm linux", "test/asm/arm.s")
,("x86 linux", "test/asm/x86-linux.s")]
$ \(d, f) ->do
context d $ do
x <- runIO $ parse f
it "x should be 1" $ do
lookupInteger "x" x `shouldBe` (Just 1)
it "z should be 0xffffffff" $ do
lookupInteger "y" x `shouldBe` (Just 0xffffffff)
it "z should be -1" $ do
lookupInteger "z" x `shouldBe` (Just (-1))
it "t should be \"Hello World\\\"\\n\\0\"" $ do
lookupString "t" x `shouldBe` (Just "Hello World\"\n\0")
......@@ -59,3 +59,13 @@ Executable hsc2hs
if flag(in-ghc-tree)
cpp-options: -DIN_GHC_TREE
test-suite spec
main-is: Spec.hs
hs-source-dirs: .
other-modules: ATTParser
ghc-options: -Wall -threaded
type: exitcode-stdio-1.0
build-depends: base
, tasty
, tasty-hspec
default-language: Haskell2010
\ No newline at end of file
all:
clang -target arm-linux-gnueabihf -S -c tmp.c -o arm.s
clang -target aarch64-linux-gnueabihf -S -c tmp.c -o aarch64.s
clang -target arm64-apple-ios -S -c tmp.c -o aarch64-ios.s
clang -target armv7-apple-ios -S -c tmp.c -o arm-ios.s
x86_64-w64-mingw32-gcc -S -c tmp.c -o x86_64-mingw32.s
clang -target i386-unknown-linux -S -c tmp.c -o x86-linux.s
clang -target x86_64-apple-macos -S -c tmp.c -o x86_64-mac.s
clang -target i386-unknown-linux -S -c tmp.c -o x86-linux.s
clang -target x86_64-unknown-linux -S -c tmp.c -o x86_64-linux.s
.section __TEXT,__text,regular,pure_instructions
.ios_version_min 7, 0
.section __DATA,__data
.globl ____hsc2hs_BOM___ ; @___hsc2hs_BOM___
.p2align 3
____hsc2hs_BOM___:
.quad 4294967296 ; 0x100000000
.globl _x___hsc2hs_sign___ ; @x___hsc2hs_sign___
.zerofill __DATA,__common,_x___hsc2hs_sign___,8,3
.globl _x ; @x
.p2align 3
_x:
.quad 1 ; 0x1
.globl _y___hsc2hs_sign___ ; @y___hsc2hs_sign___
.zerofill __DATA,__common,_y___hsc2hs_sign___,8,3
.globl _y ; @y
.p2align 3
_y:
.quad -1 ; 0xffffffffffffffff
.globl _z___hsc2hs_sign___ ; @z___hsc2hs_sign___
.p2align 3
_z___hsc2hs_sign___:
.quad 1 ; 0x1
.globl _z ; @z
.p2align 3
_z:
.quad -1 ; 0xffffffffffffffff
.section __TEXT,__cstring,cstring_literals
l_.str: ; @.str
.asciz "Hello World\"\n"
.section __DATA,__data
.globl _t ; @t
.p2align 3
_t:
.quad l_.str
.subsections_via_symbols
.text
.file "tmp.c"
.type ___hsc2hs_BOM___,@object // @___hsc2hs_BOM___
.data
.globl ___hsc2hs_BOM___
.p2align 3
___hsc2hs_BOM___:
.xword 4294967296 // 0x100000000
.size ___hsc2hs_BOM___, 8
.type x___hsc2hs_sign___,@object // @x___hsc2hs_sign___
.bss
.globl x___hsc2hs_sign___
.p2align 3
x___hsc2hs_sign___:
.xword 0 // 0x0
.size x___hsc2hs_sign___, 8
.type x,@object // @x
.data
.globl x
.p2align 3
x:
.xword 1 // 0x1
.size x, 8
.type y___hsc2hs_sign___,@object // @y___hsc2hs_sign___
.bss
.globl y___hsc2hs_sign___
.p2align 3
y___hsc2hs_sign___:
.xword 0 // 0x0
.size y___hsc2hs_sign___, 8
.type y,@object // @y
.data
.globl y
.p2align 3
y:
.xword -1 // 0xffffffffffffffff
.size y, 8
.type z___hsc2hs_sign___,@object // @z___hsc2hs_sign___
.globl z___hsc2hs_sign___
.p2align 3
z___hsc2hs_sign___:
.xword 1 // 0x1
.size z___hsc2hs_sign___, 8
.type z,@object // @z
.globl z
.p2align 3
z:
.xword -1 // 0xffffffffffffffff
.size z, 8
.type .L.str,@object // @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "Hello World\"\n"
.size .L.str, 14
.type t,@object // @t
.data
.globl t
.p2align 3
t:
.xword .L.str
.size t, 8
.ident "clang version 5.0.1 (tags/RELEASE_501/final)"
.section ".note.GNU-stack","",@progbits
.section __TEXT,__text,regular,pure_instructions
.ios_version_min 5, 0
.syntax unified
.section __DATA,__data
.globl ____hsc2hs_BOM___ @ @___hsc2hs_BOM___
.p2align 3
____hsc2hs_BOM___:
.long 0 @ 0x100000000
.long 1
.globl _x___hsc2hs_sign___ @ @x___hsc2hs_sign___
.zerofill __DATA,__common,_x___hsc2hs_sign___,8,3
.globl _x @ @x
.p2align 3
_x:
.long 1 @ 0x1
.long 0
.globl _y___hsc2hs_sign___ @ @y___hsc2hs_sign___
.zerofill __DATA,__common,_y___hsc2hs_sign___,8,3
.globl _y @ @y
.p2align 3
_y:
.long 4294967295 @ 0xffffffff
.long 0
.globl _z___hsc2hs_sign___ @ @z___hsc2hs_sign___
.p2align 3
_z___hsc2hs_sign___:
.long 1 @ 0x1
.long 0
.globl _z @ @z
.p2align 3
_z:
.long 4294967295 @ 0xffffffffffffffff
.long 4294967295
.section __TEXT,__cstring,cstring_literals
L_.str: @ @.str
.asciz "Hello World\"\n"
.section __DATA,__data
.globl _t @ @t
.p2align 2
_t:
.long L_.str
.subsections_via_symbols
.text
.syntax unified
.eabi_attribute 67, "2.09" @ Tag_conformance
.cpu arm1176jzf-s
.eabi_attribute 6, 6 @ Tag_CPU_arch
.eabi_attribute 8, 1 @ Tag_ARM_ISA_use
.eabi_attribute 9, 1 @ Tag_THUMB_ISA_use
.fpu vfpv2
.eabi_attribute 34, 0 @ Tag_CPU_unaligned_access
.eabi_attribute 68, 1 @ Tag_Virtualization_use
.eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use
.eabi_attribute 20, 2 @ Tag_ABI_FP_denormal
.eabi_attribute 21, 0 @ Tag_ABI_FP_exceptions
.eabi_attribute 23, 3 @ Tag_ABI_FP_number_model
.eabi_attribute 24, 1 @ Tag_ABI_align_needed
.eabi_attribute 25, 1 @ Tag_ABI_align_preserved
.eabi_attribute 28, 1 @ Tag_ABI_VFP_args
.eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format
.eabi_attribute 18, 4 @ Tag_ABI_PCS_wchar_t
.eabi_attribute 26, 2 @ Tag_ABI_enum_size
.eabi_attribute 14, 0 @ Tag_ABI_PCS_R9_use
.file "tmp.c"
.type ___hsc2hs_BOM___,%object @ @___hsc2hs_BOM___
.data
.globl ___hsc2hs_BOM___
.p2align 3
___hsc2hs_BOM___:
.long 0 @ 0x100000000
.long 1
.size ___hsc2hs_BOM___, 8
.type x___hsc2hs_sign___,%object @ @x___hsc2hs_sign___
.bss
.globl x___hsc2hs_sign___
.p2align 3
x___hsc2hs_sign___:
.long 0 @ 0x0
.long 0
.size x___hsc2hs_sign___, 8
.type x,%object @ @x
.data
.globl x
.p2align 3
x:
.long 1 @ 0x1
.long 0
.size x, 8
.type y___hsc2hs_sign___,%object @ @y___hsc2hs_sign___
.bss
.globl y___hsc2hs_sign___
.p2align 3
y___hsc2hs_sign___:
.long 0 @ 0x0
.long 0
.size y___hsc2hs_sign___, 8
.type y,%object @ @y
.data
.globl y
.p2align 3
y:
.long 4294967295 @ 0xffffffff
.long 0
.size y, 8
.type z___hsc2hs_sign___,%object @ @z___hsc2hs_sign___
.globl z___hsc2hs_sign___
.p2align 3
z___hsc2hs_sign___:
.long 1 @ 0x1
.long 0
.size z___hsc2hs_sign___, 8
.type z,%object @ @z
.globl z
.p2align 3
z:
.long 4294967295 @ 0xffffffffffffffff
.long 4294967295
.size z, 8
.type .L.str,%object @ @.str
.section .rodata.str1.1,"aMS",%progbits,1
.L.str:
.asciz "Hello World\"\n"
.size .L.str, 14
.type t,%object @ @t
.data
.globl t
.p2align 2
t:
.long .L.str
.size t, 4
.ident "clang version 5.0.1 (tags/RELEASE_501/final)"
.section ".note.GNU-stack","",%progbits
struct S { int unused; };
#define X 1
#define Y -1
// if BOM is 1, we end up with two 32bit integers
// where the upper 4 byte ended up in the lower 4.
extern unsigned long long ___hsc2hs_BOM___;
unsigned long long ___hsc2hs_BOM___ = 0x100000000;
extern unsigned long long x___hsc2hs_sign___;
extern unsigned long long x;
unsigned long long x___hsc2hs_sign___ = ((struct S *)X) < 0;
unsigned long long x = (unsigned long long)((struct S *)X);
extern unsigned long long y___hsc2hs_sign___;
extern unsigned long long y;
unsigned long long y___hsc2hs_sign___ = ((struct S *)Y) < 0;
unsigned long long y = (unsigned long long)((struct S *)Y);
extern unsigned long long z___hsc2hs_sign___;
extern unsigned long long z;
unsigned long long z___hsc2hs_sign___ = Y < 0;
unsigned long long z = (unsigned long long)Y;
extern char * t;
char * t = "Hello World\"\n";
.text
.file "tmp.c"
.type ___hsc2hs_BOM___,@object # @___hsc2hs_BOM___
.data
.globl ___hsc2hs_BOM___
.p2align 3
___hsc2hs_BOM___:
.quad 4294967296 # 0x100000000
.size ___hsc2hs_BOM___, 8
.type x___hsc2hs_sign___,@object # @x___hsc2hs_sign___
.bss
.globl x___hsc2hs_sign___
.p2align 3
x___hsc2hs_sign___:
.quad 0 # 0x0
.size x___hsc2hs_sign___, 8
.type x,@object # @x
.data
.globl x
.p2align 3
x:
.quad 1 # 0x1
.size x, 8
.type y___hsc2hs_sign___,@object # @y___hsc2hs_sign___
.bss
.globl y___hsc2hs_sign___
.p2align 3
y___hsc2hs_sign___:
.quad 0 # 0x0
.size y___hsc2hs_sign___, 8
.type y,@object # @y
.data
.globl y
.p2align 3
y:
.quad 4294967295 # 0xffffffff
.size y, 8
.type z___hsc2hs_sign___,@object # @z___hsc2hs_sign___
.globl z___hsc2hs_sign___
.p2align 3
z___hsc2hs_sign___:
.quad 1 # 0x1
.size z___hsc2hs_sign___, 8
.type z,@object # @z
.globl z
.p2align 3
z:
.quad -1 # 0xffffffffffffffff
.size z, 8
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "Hello World\"\n"
.size .L.str, 14
.type t,@object # @t
.data
.globl t
.p2align 2
t:
.long .L.str
.size t, 4
.ident "clang version 5.0.1 (tags/RELEASE_501/final)"
.section ".note.GNU-stack","",@progbits
.text
.file "tmp.c"
.type ___hsc2hs_BOM___,@object # @___hsc2hs_BOM___
.data
.globl ___hsc2hs_BOM___
.p2align 3
___hsc2hs_BOM___:
.quad 4294967296 # 0x100000000
.size ___hsc2hs_BOM___, 8
.type x___hsc2hs_sign___,@object # @x___hsc2hs_sign___
.bss
.globl x___hsc2hs_sign___
.p2align 3
x___hsc2hs_sign___:
.quad 0 # 0x0
.size x___hsc2hs_sign___, 8
.type x,@object # @x
.data
.globl x