From 966b62dddf60487aec17b5dc1bac727c8d200c4f Mon Sep 17 00:00:00 2001 From: Josef Svenningsson Date: Mon, 27 Jan 2025 04:27:37 -0800 Subject: [PATCH] Fix element generators of bytes Summary: In a D68322262 I fixed how we write a byte to memory. But there was still a problem remaining for how we read a byte when we use the elements construct. We were still reading using the packed representation. This diff fixes that by reading using the fixed representation instead. I've had to add a new bytecode instruction to fix it. There's also a fix for the `all` construct when generating bytes. Reviewed By: simonmar Differential Revision: D68636014 fbshipit-source-id: 208ad0c697f9b5d15cb6e2474803f91c8c18bb78 --- .../Glean/Bytecode/Generate/Instruction.hs | 10 ++++++-- glean/bytecode/evaluate.h | 25 ++++++++++++++++++- glean/bytecode/instruction.h | 2 +- glean/db/Glean/Query/Codegen.hs | 19 ++++++++------ glean/rts/bytecode/subroutine.cpp | 6 +++++ glean/test/tests/Angle/ArrayTest.hs | 8 ++++++ glean/test/tests/Angle/SetTest.hs | 5 ++++ 7 files changed, 64 insertions(+), 11 deletions(-) diff --git a/glean/bytecode/def/Glean/Bytecode/Generate/Instruction.hs b/glean/bytecode/def/Glean/Bytecode/Generate/Instruction.hs index 02d034201..90ebaa460 100644 --- a/glean/bytecode/def/Glean/Bytecode/Generate/Instruction.hs +++ b/glean/bytecode/def/Glean/Bytecode/Generate/Instruction.hs @@ -85,7 +85,7 @@ data Usage -- -- BUMP THIS WHENEVER YOU CHANGE THE BYTECODE EVEN IF YOU JUST ADD INSTRUCTIONS version :: Int -version = 14 +version = 15 -- | Lowest bytecode version supported by the current engine. -- @@ -93,7 +93,7 @@ version = 14 -- TO THE END OF THE LIST (in which case the new engine can still execute -- old bytecode) lowestSupportedVersion :: Int -lowestSupportedVersion = 14 +lowestSupportedVersion = 15 -- | Definitions of all bytecode instructions instructions :: [Insn] @@ -105,6 +105,12 @@ instructions = , Arg "end" $ reg DataPtr Load , Arg "dst" $ reg Word Store ] + -- Decode a Byte from memory into a register + , Insn "InputByte" [] [] + [ Arg "begin" $ reg DataPtr Update + , Arg "end" $ reg DataPtr Load + , Arg "dst" $ reg Word Store ] + -- Advance begin by size bytes, and bounds-check against end , Insn "InputBytes" [] [] [ Arg "begin" $ reg DataPtr Update diff --git a/glean/bytecode/evaluate.h b/glean/bytecode/evaluate.h index b2a136a53..1a934c60d 100644 --- a/glean/bytecode/evaluate.h +++ b/glean/bytecode/evaluate.h @@ -7,6 +7,10 @@ eval_InputNat(); break; + case Op::InputByte: + eval_InputByte(); + break; + case Op::InputBytes: eval_InputBytes(); break; @@ -237,7 +241,6 @@ case Op::Ret: return eval_Ret(); - case Op::Unused59: case Op::Unused60: case Op::Unused61: case Op::Unused62: @@ -442,6 +445,7 @@ FOLLY_ALWAYS_INLINE const uint64_t * FOLLY_NULLABLE evalIndirect() { static const void * const labels[] = { &&label_InputNat, + &&label_InputByte, &&label_InputBytes, &&label_InputSkipUntrustedString, &&label_InputShiftLit, @@ -508,6 +512,10 @@ eval_InputNat(); goto *labels[*pc++]; + label_InputByte: + eval_InputByte(); + goto *labels[*pc++]; + label_InputBytes: eval_InputBytes(); goto *labels[*pc++]; @@ -754,6 +762,21 @@ return execute(args); } + struct InputByte { + Reg begin; + const unsigned char * end; + Reg dst; + }; + + FOLLY_ALWAYS_INLINE void eval_InputByte() { + InputByte args; + args.begin = Reg(&frame[*pc++]); + args.end = Reg(&frame[*pc++]).get(); + args.dst = Reg(&frame[*pc++]); + DVLOG(5) << "InputByte" << " " << "<>" << " " << "<>" << " " << args.dst; + return execute(args); + } + struct InputBytes { Reg begin; const unsigned char * end; diff --git a/glean/bytecode/instruction.h b/glean/bytecode/instruction.h index afdb8b614..b315334fb 100644 --- a/glean/bytecode/instruction.h +++ b/glean/bytecode/instruction.h @@ -10,6 +10,7 @@ namespace rts { enum class Op : uint8_t { InputNat, + InputByte, InputBytes, InputSkipUntrustedString, InputShiftLit, @@ -68,7 +69,6 @@ enum class Op : uint8_t { TraceReg, Suspend, Ret, - Unused59, Unused60, Unused61, Unused62, diff --git a/glean/db/Glean/Query/Codegen.hs b/glean/db/Glean/Query/Codegen.hs index caf27f9d8..d4f278977 100644 --- a/glean/db/Glean/Query/Codegen.hs +++ b/glean/db/Glean/Query/Codegen.hs @@ -525,7 +525,9 @@ compileStatements local $ \reg -> do compileTermGen expr vars (Just reg) $ insertWordSet set (castRegister reg) - wordSetToArray set (castRegister (vars!v)) + if isByteTy ty + then byteSetToByteArray set (castRegister (vars!v)) + else wordSetToArray set (castRegister (vars!v)) freeWordSet set compile rest compile (CgAllStatement (Var _ v _) expr stmts : rest) = do @@ -882,12 +884,15 @@ compileStatements local $ \start size -> do -- really want to just matchPat here move ptr start - if isWordTy eltTy then do - inputNat ptr end reg - else do - skipTrusted ptr end eltTy - resetOutput (castRegister reg) - outputBytes start ptr (castRegister reg) + if + | isByteTy eltTy -> + inputByte ptr end reg + | isWordTy eltTy -> + inputNat ptr end reg + | otherwise -> do + skipTrusted ptr end eltTy + resetOutput (castRegister reg) + outputBytes start ptr (castRegister reg) ptrDiff start ptr size add size off a <- inner diff --git a/glean/rts/bytecode/subroutine.cpp b/glean/rts/bytecode/subroutine.cpp index 20bae4aa2..7420a2e88 100644 --- a/glean/rts/bytecode/subroutine.cpp +++ b/glean/rts/bytecode/subroutine.cpp @@ -36,6 +36,12 @@ struct Eval { a.begin << input.data(); } + FOLLY_ALWAYS_INLINE void execute(InputByte a) { + binary::Input input{*a.begin, a.end}; + a.dst << input.fixed(); + a.begin << input.data(); + } + FOLLY_ALWAYS_INLINE void execute(InputBytes a) { binary::Input input{*a.begin, a.end}; input.bytes(a.size); diff --git a/glean/test/tests/Angle/ArrayTest.hs b/glean/test/tests/Angle/ArrayTest.hs index cde854abe..6f78d08e4 100644 --- a/glean/test/tests/Angle/ArrayTest.hs +++ b/glean/test/tests/Angle/ArrayTest.hs @@ -10,7 +10,9 @@ {-# LANGUAGE TypeApplications #-} module Angle.ArrayTest (main) where +import Data.List import Data.Text (Text) +import Data.Word import Test.HUnit import TestRunner @@ -75,6 +77,12 @@ angleArrayGenerator modify = dbTestCase $ \env repo -> do assertEqual "angle - array generator 4" [ ("a", Nat 1), ("b", Nat 2) ] results + results <- runQuery_ env repo $ modify $ + angleData @Byte + [s| [1 : byte, 255][..] |] + print results + assertEqual "angle - array generator 5" + (sort [Byte 1, Byte (fromIntegral (255 :: Word8))]) (sort results) angleArrayPrefix :: (forall a . Query a -> Query a) -> Test angleArrayPrefix modify = TestList diff --git a/glean/test/tests/Angle/SetTest.hs b/glean/test/tests/Angle/SetTest.hs index 189c6c595..7e87ec051 100644 --- a/glean/test/tests/Angle/SetTest.hs +++ b/glean/test/tests/Angle/SetTest.hs @@ -18,6 +18,7 @@ import Control.Monad.Trans.Except import Control.Monad.Except import Data.Default import Data.Text (Text, unpack) +import Data.Word import Glean.Angle.Parser import Glean.Angle.Types hiding (Nat, Type) @@ -130,6 +131,10 @@ setSemanticsTest = TestList , TestLabel "all of big number" $ dbTestCase $ \env repo -> do r <- runQuery_ env repo $ angleData @Nat [s| elements (all 65535) |] assertEqual "big nat" r [Nat 65535] + , TestLabel "all of bytes" $ dbTestCase $ \env repo -> do + r <- runQuery_ env repo $ angleData @Byte + [s| elements (all (1 : byte | 240)) |] + assertEqual "big byte" [Byte 1, Byte (fromIntegral (240 :: Word8))] r , TestLabel "size of a set" $ dbTestCase $ \env repo -> do r <- runQuery_ env repo $ angleData @Nat [s| prim.size (all (1|2|3))|] assertEqual "size" r [Nat 3]