From 47b6d84b5da59185a734c59bf915b8352db0ca19 Mon Sep 17 00:00:00 2001 From: Josef Svenningsson Date: Fri, 20 Dec 2024 06:01:13 -0800 Subject: [PATCH] Schema evolution for sets Summary: This diff enables two things: * Schema evolution for elements of sets. * Schema evolution from array to set. This is very handy for starting to use sets in place of arrays. Since sets are implemented using so called "syscalls" they are required when evolve and transform terms. The code that does the transformation didn't have access to the syscalls before, and it is too clever for its own good. So there's a lot of noise in this diff to pass along the syscalls. Reviewed By: simonmar Differential Revision: D67345466 fbshipit-source-id: 04aa59bced097572426354cad559ff2fb05a1ee2 --- glean.cabal.in | 1 + glean/db/Glean/Database/Schema/Types.hs | 7 +- glean/db/Glean/Query/Codegen.hs | 87 ++++++----- glean/db/Glean/Query/Transform.hs | 182 ++++++++++++++++++------ glean/db/Glean/Write/JSON.hs | 47 +++++- glean/hs/Glean/RTS/Set.hs | 98 +++++++++++++ glean/hs/Glean/Schema/Evolve.hs | 10 +- glean/hs/Glean/Typed/Binary.hs | 37 ++++- glean/rts/ffi.cpp | 60 ++++++++ glean/test/tests/Schema/Evolves.hs | 175 +++++++++++++++++++++++ 10 files changed, 606 insertions(+), 98 deletions(-) create mode 100644 glean/hs/Glean/RTS/Set.hs diff --git a/glean.cabal.in b/glean.cabal.in index 185897424..3cc9a7d02 100644 --- a/glean.cabal.in +++ b/glean.cabal.in @@ -404,6 +404,7 @@ library core Glean.RTS.Foreign.Subst Glean.RTS.Foreign.Thrift Glean.RTS.Foreign.Typecheck + Glean.RTS.Set Glean.RTS.Term Glean.RTS.Traverse Glean.RTS.Typecheck diff --git a/glean/db/Glean/Database/Schema/Types.hs b/glean/db/Glean/Database/Schema/Types.hs index 8d2c4134f..5690fc63b 100644 --- a/glean/db/Glean/Database/Schema/Types.hs +++ b/glean/db/Glean/Database/Schema/Types.hs @@ -57,6 +57,7 @@ import Glean.Angle.Hash import Glean.Angle.Types as Schema hiding (Type, FieldDef) import qualified Glean.Angle.Types as Schema import Glean.Bytecode.Types +import Glean.Query.Codegen.QueryRegs import Glean.Query.Codegen.Types (Match, Output, TransformAndBind) import Glean.Database.Schema.ComputeIds import Glean.Query.Typecheck.Types @@ -125,12 +126,14 @@ data PredicateTransformation = PredicateTransformation -- of the requested predicate. , transformKeyPattern :: forall a - . Maybe ( (Type -> Term (Match TransformAndBind Output) -> Code ()) + . Maybe ( QueryRegs + -> (Type -> Term (Match TransformAndBind Output) -> Code ()) -> Term (Match TransformAndBind Output) -> (Term (Match TransformAndBind Output) -> Code a) -> Code a) , transformValuePattern :: forall a - . Maybe ( (Type -> Term (Match TransformAndBind Output) -> Code ()) + . Maybe ( QueryRegs + -> (Type -> Term (Match TransformAndBind Output) -> Code ()) -> Term (Match TransformAndBind Output) -> (Term (Match TransformAndBind Output) -> Code a) -> Code a) diff --git a/glean/db/Glean/Query/Codegen.hs b/glean/db/Glean/Query/Codegen.hs index a6f8c0035..a99cc6027 100644 --- a/glean/db/Glean/Query/Codegen.hs +++ b/glean/db/Glean/Query/Codegen.hs @@ -238,7 +238,7 @@ compileQuery r qtrans bounds (QueryWithInfo query numVars lookup ty) = do output $ \resultKeyOutput resultValueOutput -> let code :: forall a. Code a -> Code a - code = compileStatements qtrans bounds regs stmts vars + code = compileStatements regs qtrans bounds regs stmts vars queryStmts :: forall a. Code a -> Code a queryStmts = case r of @@ -391,14 +391,15 @@ cmpWordPat vars pat = case pat of -- | Compare a value in an output register with a pattern. If the -- pattern matches, fall through, otherwise jump to the given label. cmpOutputPat - :: Register 'BinaryOutputPtr -- ^ register containing the value + :: QueryRegs + -> Register 'BinaryOutputPtr -- ^ register containing the value -> [QueryChunk Output] -- ^ pattern to match against -> Label -- ^ jump here on match failure -> Code () -cmpOutputPat reg pat fail = +cmpOutputPat syscalls reg pat fail = local $ \ptr begin -> do getOutput reg begin ptr - matchPat (Bytes begin ptr) fail pat + matchPat syscalls (Bytes begin ptr) fail pat compileTermGen :: Expr @@ -425,7 +426,8 @@ compileTermGen term vars maybeReg andThen = do compileStatements :: forall a - . QueryTransformations + . QueryRegs + -> QueryTransformations -> Boundaries -> QueryRegs -> [CgStatement] @@ -434,6 +436,7 @@ compileStatements -- the result is constructed. -> Code a compileStatements + syscalls qtrans bounds regs@QueryRegs{..} @@ -448,7 +451,7 @@ compileStatements local $ \failed innerRet -> mdo let compileBranch stmts = - compileStatements qtrans bounds regs stmts vars $ mdo + compileStatements syscalls qtrans bounds regs stmts vars $ mdo site <- callSite loadLabel ret innerRet jump doInner @@ -457,10 +460,11 @@ compileStatements -- if loadConst 1 failed - thenSite <- compileStatements qtrans bounds regs cond vars $ do + thenSite <- compileStatements syscalls qtrans bounds regs cond vars $ + do -- then - loadConst 0 failed - compileBranch then_ + loadConst 0 failed + compileBranch then_ -- else jumpIf0 failed done elseSite <- compileBranch else_ @@ -516,7 +520,7 @@ compileStatements local $ \setReg -> do let set = castRegister setReg newWordSet set - compileStatements qtrans bounds regs stmts vars $ + compileStatements syscalls qtrans bounds regs stmts vars $ local $ \reg -> do compileTermGen expr vars (Just reg) $ insertWordSet set (castRegister reg) @@ -527,7 +531,7 @@ compileStatements local $ \setReg -> do let set = castRegister setReg newSet set - compileStatements qtrans bounds regs stmts vars $ + compileStatements syscalls qtrans bounds regs stmts vars $ output $ \out -> do compileTermGen expr vars (Just out) $ insertOutputSet set out @@ -535,8 +539,8 @@ compileStatements freeSet set compile rest compile (CgNegation stmts : rest) = mdo - singleResult (compileStatements qtrans bounds regs stmts vars) $ - jump fail + singleResult (compileStatements syscalls qtrans bounds regs stmts vars) + (jump fail) a <- compile rest fail <- label return a @@ -565,7 +569,7 @@ compileStatements compile (CgDisjunction stmtss : rest) = local $ \innerRet -> mdo sites <- forM stmtss $ \stmts -> do - compileStatements qtrans bounds regs stmts vars $ mdo + compileStatements syscalls qtrans bounds regs stmts vars $ mdo site <- callSite loadLabel ret_ innerRet jump doInner @@ -609,7 +613,7 @@ compileStatements cont out (\_ -> return ()) | otherwise = output $ \reg -> - cont reg (\fail -> cmpOutputPat reg chunks fail) + cont reg (\fail -> cmpOutputPat syscalls reg chunks fail) mtrans :: Maybe PredicateTransformation mtrans = lookupTransformation pid qtrans @@ -617,11 +621,15 @@ compileStatements -- The pid we expect to retrieve from the database PidRef expected _ = maybe pref tAvailable mtrans noTrans _ v f = f v - transKeyPat = fromMaybe noTrans $ transformKeyPattern =<< mtrans - transValPat = fromMaybe noTrans $ transformValuePattern =<< mtrans + transKeyPat = fromMaybe noTrans $ + (transformKeyPattern =<< mtrans) <*> Just syscalls + transValPat = fromMaybe noTrans $ + (transformValuePattern =<< mtrans) <*> Just syscalls a <- - transKeyPat (matchDef fail) (inlineVars vars kpat) $ \kpat' -> do - transValPat (matchDef fail) (inlineVars vars vpat) $ \vpat' -> do + transKeyPat (matchDef syscalls fail) (inlineVars vars kpat) $ + \kpat' -> + transValPat (matchDef syscalls fail) (inlineVars vars vpat) $ + \vpat' -> do patOutput (preProcessPat kpat') $ \kout kcmp -> do patOutput (preProcessPat vpat') $ \vout vcmp -> do reg <- load fail @@ -652,7 +660,8 @@ compileStatements filterPat reg pat fail | Just cmp <- maybeWordFilter = cmp reg fail - | otherwise = cmpOutputPat (castRegister reg) chunks fail + | otherwise = + cmpOutputPat syscalls (castRegister reg) chunks fail where chunks = preProcessPat $ inlineVars vars pat in outReg $ \reg -> @@ -714,7 +723,7 @@ compileStatements cmp (castRegister q') ok Nothing -> withTerm vars q $ \q' -> - cmpOutputPat q' (preProcessPat $ inlineVars vars p) ok + cmpOutputPat syscalls q' (preProcessPat $ inlineVars vars p) ok jump fail ok <- label whenJust maybeReg (resetOutput . castRegister) @@ -947,21 +956,22 @@ compileStatements -- | Match term against the default value for its type matchDef - :: Label + :: QueryRegs + -> Label -> Type -> Term (Match TransformAndBind Output) -> Code () -matchDef fail ty pat = +matchDef syscalls fail ty pat = output $ \out -> do resetOutput out buildTerm out mempty (defaultValue ty) local $ \start end -> do getOutput out start end - matchPat (Bytes start end) fail (preProcessPat pat) + matchPat syscalls (Bytes start end) fail (preProcessPat pat) compileFactGenerator :: forall a - . Maybe PredicateTransformation + . Maybe PredicateTransformation -> Boundaries -> QueryRegs -> Vector (Register 'Word) -- ^ registers for variables @@ -972,10 +982,10 @@ compileFactGenerator -> Maybe (Register 'Word) -> Code a -> Code a -compileFactGenerator mtrans bounds QueryRegs{..} +compileFactGenerator mtrans bounds qregs@QueryRegs{..} vars pid kpat vpat section maybeReg inner = mdo let etrans = maybe (Left pid) Right mtrans - withPatterns etrans vars kpat vpat $ + withPatterns qregs etrans vars kpat vpat $ \availablePid isPointQuery prefix matchKey matchValue -> do typ <- constant $ fromIntegral $ fromPid availablePid @@ -1043,7 +1053,8 @@ compileFactGenerator mtrans bounds QueryRegs{..} -- If the transformation determines that the modified pattern will never match -- the callback code will be skipped at runtime. withPatterns - :: Either Pid PredicateTransformation + :: QueryRegs + -> Either Pid PredicateTransformation -> Vector (Register 'Word) -- ^ registers for variables -> Pat -- ^ key pattern -> Pat -- ^ value pattern @@ -1055,16 +1066,16 @@ withPatterns -> Code a ) -> Code a -withPatterns etrans vars kpat vpat act = mdo +withPatterns syscalls etrans vars kpat vpat act = mdo a <- - transKeyPat (matchDef fail) (inlineVars vars kpat) $ \kpat' -> do - transValPat (matchDef fail) (inlineVars vars vpat) $ \vpat' -> do + transKeyPat (matchDef syscalls fail) (inlineVars vars kpat) $ \kpat' -> do + transValPat (matchDef syscalls fail) (inlineVars vars vpat) $ \vpat' -> do let kchunks = preProcessPat kpat' vchunks = preProcessPat vpat' when (emptyPrefix kchunks) (fullScan pid) withPrefix kchunks $ \isPointQuery prefix remaining -> do - let matchKey bytes fail = matchPat bytes fail remaining - matchVal bytes fail = matchPat bytes fail vchunks + let matchKey bytes fail = matchPat syscalls bytes fail remaining + matchVal bytes fail = matchPat syscalls bytes fail vchunks needs_value = not (all isWild vchunks) act pid isPointQuery prefix matchKey (if needs_value then Just matchVal else Nothing) @@ -1075,8 +1086,8 @@ withPatterns etrans vars kpat vpat act = mdo Right PredicateTransformation{..} -> let PidRef pid _ = tAvailable in ( pid - , fromMaybe noTrans transformKeyPattern - , fromMaybe noTrans transformValuePattern + , fromMaybe noTrans (transformKeyPattern <*> Just syscalls) + , fromMaybe noTrans (transformValuePattern <*> Just syscalls) ) Left pid -> ( pid @@ -1446,8 +1457,8 @@ recursive QueryRegs{..} before after andThen = -- | check that a value matches a pattern, and bind variables as -- necessary. The pattern is assumed to cover the *whole* of the -- input. -matchPat :: Bytes -> Label -> [QueryChunk Output] -> Code () -matchPat (Bytes input inputend) fail chunks = match True chunks +matchPat :: QueryRegs -> Bytes -> Label -> [QueryChunk Output] -> Code () +matchPat syscalls (Bytes input inputend) fail chunks = match True chunks where match :: Bool -- ^ whether the query chunks match until the end of the input. @@ -1536,7 +1547,7 @@ matchPat (Bytes input inputend) fail chunks = match True chunks match tillEnd rest QueryTransformAndBind from (Typed to out) -> - case transformBytes from to of + case transformBytes syscalls from to of Nothing -> match tillEnd (QueryBind (Typed to out) : rest) Just transform -> do let bytes = Bytes input inputend diff --git a/glean/db/Glean/Query/Transform.hs b/glean/db/Glean/Query/Transform.hs index d838a6c77..c4c78ea5c 100644 --- a/glean/db/Glean/Query/Transform.hs +++ b/glean/db/Glean/Query/Transform.hs @@ -46,6 +46,7 @@ import Data.Word (Word64) import Glean.Angle.Types (Type_(..), FieldDef_(..), Name) import Glean.Bytecode.Types (Ty(..)) import Glean.Schema.Util (lowerEnum, lowerMaybe, showRef) +import Glean.Query.Codegen.QueryRegs import Glean.Query.Codegen.Types ( Match(..) , Var(..) @@ -219,22 +220,22 @@ buildTerm -> Vector (Register 'Word) -> Term (Match () Var) -> Code () -buildTerm output vars term = go term +buildTerm out vars term = go term where go term = case term of - Byte b -> outputByteImm (fromIntegral b) output - Nat n -> outputNatImm n output + Byte b -> outputByteImm (fromIntegral b) out + Nat n -> outputNatImm n out String s -> local $ \ptr end -> do -- NOTE: We assume that the string has been validated during parsing. loadLiteral (RTS.mangleString s) ptr end - outputBytes ptr end output + outputBytes ptr end out Array vs -> do - outputNatImm (fromIntegral (length vs)) output + outputNatImm (fromIntegral (length vs)) out mapM_ go vs Tuple fields -> mapM_ go fields - Alt n term -> do outputNatImm n output; go term - Ref (MatchFid f) -> outputNatImm (fromIntegral (fromFid f)) output + Alt n term -> do outputNatImm n out; go term + Ref (MatchFid f) -> outputNatImm (fromIntegral (fromFid f)) out Ref (MatchPrefix str rest) -> do local $ \ptr end -> do let @@ -242,14 +243,14 @@ buildTerm output vars term = go term withoutTerminator = ByteString.take (ByteString.length mangled - 2) mangled loadLiteral withoutTerminator ptr end - outputBytes ptr end output + outputBytes ptr end out go rest Ref (MatchVar (Var ty var _)) - | isWordTy ty -> outputNat (vars ! var) output + | isWordTy ty -> outputNat (vars ! var) out | otherwise -> local $ \ptr end -> do getOutput (castRegister (vars ! var)) ptr end - outputBytes ptr end output + outputBytes ptr end out Ref (MatchArrayPrefix _ _ all) -> go all other -> error $ "buildTerm: " <> show other @@ -317,9 +318,10 @@ transformExpression :: Type -> Type -> Maybe (Value -> Value) transformExpression from to = case transformTerm inner defaultValue from to of Nothing -> Nothing - Just f -> Just $ \ta -> f discard ta id + Just f -> Just $ \ta -> + f (error "QueryRegs are never used in transformExpression") discard ta id where - inner _ _ _ _ a f = f a + inner _ _ _ _ _ a f = f a discard _ _ = return () type Matcher = Match TransformAndBind Output @@ -331,21 +333,22 @@ transformPattern :: forall a. Type -> Type -> Maybe - ( (Type -> Term Matcher -> Code ()) + ( QueryRegs + -> (Type -> Term Matcher -> Code ()) -> Term Matcher -> (Term Matcher -> Code a) -> Code a ) transformPattern from to = do f <- transformTerm transformMatch defaultForType from to - return $ \discard term -> + return $ \qr discard term -> let discard' :: Type -> Term Matcher -> Cont (Code a) () discard' a b = cont $ \r -> do () <- discard a b r () in - runCont (f discard' term) + runCont (f qr discard' term) where defaultForType ty = Ref (MatchWild ty) @@ -355,13 +358,14 @@ transformPattern from to = do -- NB. Type compatibility is not checked. Assumes that a transformation is -- possible and required. transformMatch - :: (Type -> Matcher -> Cont (Code x) ()) -- ^ handle discarded record fields + :: QueryRegs + -> (Type -> Matcher -> Cont (Code x) ()) -- ^ handle discarded record fields -> Type -> Type -> (Term Matcher -> Cont (Code x) (Term Matcher)) -> Matcher -> Cont (Code x) Matcher -transformMatch discard from to overTerm match = case match of +transformMatch syscalls discard from to overTerm match = case match of MatchWild _ -> return $ MatchWild to MatchNever _ -> return $ MatchNever to MatchFid fid -> return $ MatchFid fid @@ -369,7 +373,7 @@ transformMatch discard from to overTerm match = case match of -- If we get to this case it means that this conversion is required, MatchBind out -> return $ MatchExt $ TransformAndBind to out MatchVar (Typed _ var) -> - case transformBytes' discard' from to of + case transformBytes' syscalls discard' from to of Nothing -> return $ MatchVar $ Typed to var Just transform -> cont $ \r -> @@ -403,7 +407,8 @@ transformMatch discard from to overTerm match = case match of run m = void $ runCont m (\() -> return undefined) type TransformTerm m a b - = (Type -> Term a -> m ()) -- ^ discard term + = QueryRegs + -> (Type -> Term a -> m ()) -- ^ discard term -> Term a -- ^ source term -> m (Term b) @@ -414,7 +419,8 @@ type TransformTerm m a b -- differently so we take those handling functions as input. transformTerm :: forall a b m. (Coercible a b, Show a, Show b, Monad m) - => ( (Type -> a -> m ()) -- discard inner value + => ( QueryRegs + -> (Type -> a -> m ()) -- discard inner value -> Type -- from type -> Type -- to type -> (Term a -> m (Term b)) -- handle inner terms @@ -440,12 +446,15 @@ transformTerm inner defaultForType src dst = go src dst transformationsFor from to = Map.intersectionWith trans fromFields toFields where + trans :: (Word64, Type) + -> (Word64, Type) + -> Maybe (Word64, TransformTerm m a b) trans (ixFrom, defFrom) (ixTo, defTo) = case go defFrom defTo of -- fields are identical Nothing | ixTo == ixFrom -> Nothing -- field order changed - Nothing -> Just (ixTo, const id') + Nothing -> Just (ixTo, \_ _ -> id') -- field content changed Just f -> Just (ixTo, f) @@ -470,18 +479,28 @@ transformTerm inner defaultForType src dst = go src dst go (lowerEnum from) (lowerEnum to) go (ArrayTy from) (ArrayTy to) = do f <- go from to - return $ fix $ \recurse discard term -> + return $ fix $ \recurse qr discard term -> case term of - Array vs -> Array <$> traverse (f discard) vs - Ref a -> Ref <$> inner + Array vs -> Array <$> traverse (f qr discard) vs + Ref a -> Ref <$> inner qr (\ty val -> discard ty (Ref val)) (ArrayTy from) (ArrayTy to) - (recurse discard) + (recurse qr discard) + a + _ -> error $ "expected Array, got " <> show term + go (SetTy from) (SetTy to) = do + f <- go from to + return $ fix $ \recurse qr discard term -> + case term of + Array vs -> Array <$> traverse (f qr discard) vs + Ref a -> Ref <$> inner qr + (\ty val -> discard ty (Ref val)) + (SetTy from) + (SetTy to) + (recurse qr discard) a _ -> error $ "expected Array, got " <> show term - go (SetTy from) (SetTy to) - | from == to = Nothing go (RecordTy from) (RecordTy to) = let transformations = transformationsFor from to sameFieldCount = length from == length to @@ -491,14 +510,15 @@ transformTerm inner defaultForType src dst = go src dst in if noChange then Nothing - else Just $ fix $ \recurse discard term -> case term of + else Just $ fix $ \recurse qr discard term -> case term of Tuple contents -> do contents' <- sequence [ case Map.lookup name transMap of -- 'to' field doesn't exist in 'from' Nothing -> return $ defaultForType ty Just (content, Nothing) -> id' content - Just (content, Just (_, transform)) -> transform discard content + Just (content, Just (_, transform)) -> + transform qr discard content | FieldDef name ty <- to ] @@ -512,11 +532,11 @@ transformTerm inner defaultForType src dst = go src dst where transMap = Map.intersectionWith (,) contentsByName transformations contentsByName = Map.fromList $ zip (names from) contents - Ref a -> Ref <$> inner - (\ty -> discard ty . Ref) + Ref a -> Ref <$> inner qr + (\ty val -> discard ty (Ref val)) (RecordTy from) (RecordTy to) - (recurse discard) + (recurse qr discard) a _ -> error $ "expected Tuple, got " <> show term @@ -534,21 +554,46 @@ transformTerm inner defaultForType src dst = go src dst in if noChange then Nothing - else Just $ fix $ \recurse discard term -> case term of + else Just $ fix $ \recurse qr discard term -> case term of Alt n content -> case Map.lookup n transformationsByIx of -- alternative in 'from' doesn't exist in 'to' Nothing -> return unknown Just Nothing -> id' term Just (Just (n', transform)) -> do - content' <- transform discard content + content' <- transform qr discard content return (Alt n' content') - Ref a -> Ref <$> inner + Ref a -> Ref <$> inner qr (\ty -> discard ty . Ref) (SumTy from) (SumTy to) - (recurse discard) + (recurse qr discard) a _ -> error $ "expected Alt, got " <> show term + go (ArrayTy from) (SetTy to) = do + f <- go from to + return $ fix $ \recurse qr discard term -> + case term of + Array vs -> Array <$> traverse (f qr discard) vs + Ref a -> Ref <$> inner qr + (\ty val -> discard ty (Ref val)) + (ArrayTy from) + (SetTy to) + (recurse qr discard) + a + _ -> error $ "expected Array, got " <> show term + go (SetTy from) (ArrayTy to) = do + f <- go from to + return $ fix $ \recurse qr discard term -> + case term of + Array vs -> Array <$> traverse (f qr discard) vs + Ref a -> Ref <$> inner qr + (\ty val -> discard ty (Ref val)) + (SetTy from) + (ArrayTy to) + (recurse qr discard) + a + _ -> error $ "expected Array, got " <> show term + go from to = error $ "invalid type conversion: " <> show from <> " to " <> show to @@ -572,19 +617,21 @@ defaultValue ty = case derefType ty of -- The transformation function will always leave the start pointer of `Bytes` at -- the end of the transformed input. transformBytes - :: Type + :: QueryRegs + -> Type -> Type -> Maybe (Bytes -> Register 'BinaryOutputPtr -> Code ()) -transformBytes = transformBytes' ignoreDiscarded +transformBytes syscalls = transformBytes' syscalls ignoreDiscarded where ignoreDiscarded _ _ = return () transformBytes' - :: (Type -> Bytes -> Code ()) -- handle discarded record fields + :: QueryRegs + -> (Type -> Bytes -> Code ()) -- handle discarded record fields -> Type -> Type -> Maybe (Bytes -> Register 'BinaryOutputPtr -> Code ()) -transformBytes' discard src dst = +transformBytes' QueryRegs{..} discard src dst = case go src dst of Left _ -> Nothing Right transform -> Just $ \bytes out -> transform out bytes @@ -618,6 +665,25 @@ transformBytes' discard src dst = decrAndJumpIfNot0 size loop finish <- label return () + go (SetTy from) (SetTy to) = + case go from to of + Left _ -> Left $ copy (SetTy to) + Right trans -> Right $ \out (Bytes start end) -> + local $ \size -> do + inputNat start end size + local $ \set -> mdo + jumpIf0 size finish + newSet set + loop <- label + output $ \tempOut -> do + resetOutput tempOut + trans tempOut (Bytes start end) + insertOutputSet set tempOut + decrAndJumpIfNot0 size loop + finish <- label + setToArray set out + freeSet set + return () go (SumTy from) (SumTy to) | sameOrder && sameTypes = Left $ copy (SumTy to) | otherwise = Right $ \out (Bytes start end) -> mdo @@ -714,6 +780,42 @@ transformBytes' discard src dst = let fieldBytes = Bytes fieldStart end saved' = Map.insert nameFrom (fieldBytes, tyFrom) saved step saved' restFrom to' + go (ArrayTy from) (SetTy to) = + let trans = + case go from to of + Left copy -> copy + Right trans -> trans + in Right $ \out (Bytes start end) -> + local $ \size -> do + inputNat start end size + local $ \set -> mdo + jumpIf0 size finish + newSet set + loop <- label + output $ \tempOut -> do + resetOutput tempOut + trans tempOut (Bytes start end) + insertOutputSet set tempOut + decrAndJumpIfNot0 size loop + finish <- label + setToArray set out + freeSet set + return () + go (SetTy from) (ArrayTy to) = + let trans = + case go from to of + Left copy -> copy + Right trans -> trans + in Right $ \out (Bytes start end) -> + local $ \size -> mdo + inputNat start end size + outputNat size out + jumpIf0 size finish + loop <- label + trans out (Bytes start end) + decrAndJumpIfNot0 size loop + finish <- label + return () go from to = error $ "invalid type conversion: " <> show from <> " to " <> show to diff --git a/glean/db/Glean/Write/JSON.hs b/glean/db/Glean/Write/JSON.hs index a39e5e88e..231cfd92b 100644 --- a/glean/db/Glean/Write/JSON.hs +++ b/glean/db/Glean/Write/JSON.hs @@ -6,6 +6,8 @@ LICENSE file in the root directory of this source tree. -} +{-# LANGUAGE MultiWayIf #-} + module Glean.Write.JSON ( buildJsonBatch , syncWriteJsonBatch @@ -44,6 +46,7 @@ import Glean.Display import qualified Glean.FFI as FFI import Glean.RTS as RTS import Glean.RTS.Builder +import Glean.RTS.Set import Glean.RTS.Constants import qualified Glean.RTS.Foreign.JSON as J import Glean.RTS.Foreign.Subst as Subst (empty) @@ -324,12 +327,38 @@ writeJsonFact when (n > 0) $ forM_ [0 .. n-1] $ \i -> do x <- lift $ J.index arr i jsonToTerm b ty x - (SetTy ty, J.Array set) -> do - let !n = J.size set - lift $ invoke $ glean_push_value_set b $ fromIntegral n - when (n > 0) $ forM_ [0 .. n-1] $ \i -> do - x <- lift $ J.index set i - jsonToTerm b ty x + (SetTy byteTy, J.String (J.ByteStringRef p n)) + | ByteTy == repType byteTy -> if + | sendJsonBatchOptions_no_base64_binary -> + withWordRtsSet $ \rtsset -> + lift $ do + insertBytesRtsSet rtsset p n + buildWordSetBytes rtsset b + | otherwise -> lift $ do + bytes <- decodeBase64 <$> + BS.unsafePackCStringLen (castPtr p, fromIntegral n) + FFI.unsafeWithBytes bytes $ \ptr len -> + withWordRtsSet $ \rtsset -> do + insertBytesRtsSet rtsset (castPtr ptr) len + buildWordSetBytes rtsset b + (SetTy natTy, J.Array set) + | NatTy == repType natTy -> + withWordRtsSet $ \rtsset -> do + let !n = J.size set + forM_ [0 .. n-1] $ \i -> do + J.Int n <- lift $ J.index set i + lift $ insertWordRtsSet rtsset n + lift $ buildWordSet rtsset b + (SetTy ty, J.Array set) -> + withBuilder $ \tb -> + withRtsSet $ \rtsset -> do + let !n = J.size set + forM_ [0 .. n-1] $ \i -> do + x <- lift $ J.index set i + jsonToTerm tb ty x + lift $ insertBuilder rtsset tb + lift $ resetBuilder tb + lift $ buildSet rtsset b (RecordTy fields, J.Object obj) -> do let doField !n (FieldDef name ty) = do @@ -386,6 +415,12 @@ writeJsonFact lift $ invoke $ glean_push_value_selector b 1 _otherwise -> termError typ v + -- Remove named types so that we can match on the underlying + -- representation of the type + repType :: Type -> Type + repType (NamedTy (ExpandedType _ ty)) = ty + repType ty = ty + -- Thrift might omit fields from the output if they have the -- default value, so we have to reconstruct the default value -- here. diff --git a/glean/hs/Glean/RTS/Set.hs b/glean/hs/Glean/RTS/Set.hs new file mode 100644 index 000000000..14aec1725 --- /dev/null +++ b/glean/hs/Glean/RTS/Set.hs @@ -0,0 +1,98 @@ +{- + Copyright (c) Meta Platforms, Inc. and affiliates. + All rights reserved. + + This source code is licensed under the BSD-style license found in the + LICENSE file in the root directory of this source tree. +-} + +module Glean.RTS.Set + ( RtsSet + , withRtsSet + , insertBuilder + , buildSet + , WordRtsSet + , withWordRtsSet + , insertWordRtsSet + , insertBytesRtsSet + , buildWordSet + , buildWordSetBytes + ) where + +import Control.Monad.Catch +import Control.Monad.IO.Class +import Foreign +import Foreign.C + +import qualified Util.FFI as FFI + +import Glean.RTS.Builder + +newtype RtsSet = RtsSet (Ptr RtsSet) + deriving(Storable) + +withRtsSet :: (MonadMask m, MonadIO m) => (RtsSet -> m a) -> m a +withRtsSet = bracket (liftIO newRtsSet) (liftIO . freeRtsSet) + +newRtsSet :: IO RtsSet +newRtsSet = FFI.invoke glean_rtsset_new + +insertBuilder :: RtsSet -> Builder -> IO () +insertBuilder set builder = FFI.invoke $ glean_rtsset_insert set builder + +buildSet :: RtsSet -> Builder -> IO () +buildSet set builder = FFI.invoke $ glean_rtsset_build set builder + +newtype WordRtsSet = WordRtsSet (Ptr WordRtsSet) + deriving (Storable) + +withWordRtsSet :: (MonadMask m, MonadIO m) => (WordRtsSet -> m a) -> m a +withWordRtsSet = bracket (liftIO newWordRtsSet) (liftIO . freeWordRtsSet) + +newWordRtsSet :: IO WordRtsSet +newWordRtsSet = FFI.invoke glean_wordrtsset_new + +insertWordRtsSet :: Integral n => WordRtsSet -> n -> IO () +insertWordRtsSet set elem = + FFI.invoke $ glean_wordrtsset_insert set (fromIntegral elem) + +insertBytesRtsSet :: Integral n => WordRtsSet -> Ptr Word8 -> n -> IO () +insertBytesRtsSet set bytes size = + FFI.invoke $ glean_wordrtsset_insert_bytes set bytes (fromIntegral size) + +buildWordSet :: WordRtsSet -> Builder -> IO () +buildWordSet set builder = FFI.invoke $ glean_wordrtsset_build set builder + +buildWordSetBytes :: WordRtsSet -> Builder -> IO () +buildWordSetBytes set builder = + FFI.invoke $ glean_wordrtsset_build_bytes set builder + +foreign import ccall unsafe glean_rtsset_new + :: Ptr RtsSet -> IO CString + +foreign import ccall unsafe glean_rtsset_insert + :: RtsSet -> Builder -> IO CString + +foreign import ccall unsafe glean_rtsset_build + :: RtsSet -> Builder -> IO CString + +foreign import ccall unsafe "glean_rtsset_free" freeRtsSet + :: RtsSet -> IO () + +foreign import ccall unsafe glean_wordrtsset_new + :: Ptr WordRtsSet -> IO CString + +foreign import ccall unsafe glean_wordrtsset_insert + :: WordRtsSet -> CULong -> IO CString + +foreign import ccall unsafe glean_wordrtsset_insert_bytes + :: WordRtsSet -> Ptr Word8 -> CSize -> IO CString + +foreign import ccall unsafe glean_wordrtsset_build + :: WordRtsSet -> Builder -> IO CString + +foreign import ccall unsafe glean_wordrtsset_build_bytes + :: WordRtsSet -> Builder -> IO CString + +foreign import ccall unsafe "glean_wordrtsset_free" freeWordRtsSet + :: WordRtsSet -> IO () diff --git a/glean/hs/Glean/Schema/Evolve.hs b/glean/hs/Glean/Schema/Evolve.hs index 84d4531c5..c284c3c5f 100644 --- a/glean/hs/Glean/Schema/Evolve.hs +++ b/glean/hs/Glean/Schema/Evolve.hs @@ -376,12 +376,7 @@ canEvolve types compatible new old = go new old go StringTy StringTy = Nothing go BooleanTy BooleanTy = Nothing go (ArrayTy new) (ArrayTy old) = go new old - go (SetTy new) (SetTy old) - | new == old = Nothing - | otherwise = Just $ Text.pack $ - "types inside sets cannot evolve. Type changed from " <> - show (displayDefault (SetTy old)) - <> " to " <> show (displayDefault new) + go (SetTy new) (SetTy old) = go new old go (PredicateTy new) (PredicateTy old) | not (compatible new old) = Just $ "type changed from " <> showRef old @@ -395,6 +390,8 @@ canEvolve types compatible new old = go new old unitOpt name = FieldDef name (RecordTy []) go (SumTy new) (SumTy old) = compareFieldList Option new old go (RecordTy new) (RecordTy old) = compareFieldList FieldOpt new old + go (SetTy new) (ArrayTy old) = go new old + go (ArrayTy new) (SetTy old) = go new old go new old = Just $ Text.pack $ "type changed from " <> show (displayDefault old) <> " to " <> @@ -455,7 +452,6 @@ canEvolve types compatible new old = go new old <> " require that all new fields are non-predicate types" ] - plural s [_] = showOpt s plural s _ = showOpt s <> "s" showOpt Option = "option" diff --git a/glean/hs/Glean/Typed/Binary.hs b/glean/hs/Glean/Typed/Binary.hs index c7dd54ab9..1509255fc 100644 --- a/glean/hs/Glean/Typed/Binary.hs +++ b/glean/hs/Glean/Typed/Binary.hs @@ -42,6 +42,7 @@ import qualified Glean.Schema.Util as Angle import qualified Glean.FFI as FFI import qualified Glean.RTS as RTS import qualified Glean.RTS.Builder as RTS +import Glean.RTS.Set import Glean.Typed.Build import Glean.Typed.Id import Glean.Types as Thrift @@ -190,11 +191,37 @@ instance Type a => Type (Maybe a) where [pure Nothing, Just <$> decodeRtsValue] sourceType _ = Angle.MaybeTy (sourceType (Proxy @a)) -instance (Type a, Ord a) => Type (Set a) where - buildRtsValue b xs = liftIO $ do - -- TODO, we need to sort things properly - FFI.call $ RTS.glean_push_value_set b $ fromIntegral $ length xs - mapM_ (buildRtsValue b) xs +instance {-# OVERLAPPING #-} Type (Set Byte) where + buildRtsValue b xs = + withWordRtsSet $ \set -> do + forM_ xs $ \x -> + insertWordRtsSet set (unByte x) + buildWordSetBytes set b + decodeRtsValue = Decoder $ \env@DecoderEnv{..} -> do + size <- FFI.ffiBuf buf $ RTS.glean_pop_value_set begin end + fromList <$> replicateM (fromIntegral size) (runDecoder decodeRtsValue env) + sourceType _ = Angle.SetTy Angle.ByteTy + +instance {-# OVERLAPPING #-} Type (Set Nat) where + buildRtsValue b xs = + withWordRtsSet $ \set -> do + forM_ xs $ \x -> do + insertWordRtsSet set (unNat x) + buildWordSet set b + decodeRtsValue = Decoder $ \env@DecoderEnv{..} -> do + size <- FFI.ffiBuf buf $ RTS.glean_pop_value_set begin end + fromList <$> replicateM (fromIntegral size) (runDecoder decodeRtsValue env) + sourceType _ = Angle.SetTy Angle.NatTy + +instance {-# OVERLAPPABLE #-} (Type a, Ord a) => Type (Set a) where + buildRtsValue b xs = liftIO $ + withRtsSet $ \set -> do + RTS.withBuilder $ \tb -> + forM_ xs $ \x -> do + buildRtsValue tb x + insertBuilder set tb + RTS.resetBuilder tb + buildSet set b decodeRtsValue = Decoder $ \env@DecoderEnv{..} -> do size <- FFI.ffiBuf buf $ RTS.glean_pop_value_set begin end fromList <$> replicateM (fromIntegral size) (runDecoder decodeRtsValue env) diff --git a/glean/rts/ffi.cpp b/glean/rts/ffi.cpp index 5eaaeffd5..d0953c515 100644 --- a/glean/rts/ffi.cpp +++ b/glean/rts/ffi.cpp @@ -572,6 +572,66 @@ const char* glean_factset_append(FactSet* target, FactSet* source) { return ffi::wrap([=] { target->append(std::move(*source)); }); } +const char* glean_rtsset_new(BytestringSet** set) { + return ffi::wrap([=] { *set = new BytestringSet(); }); +} + +const char* glean_rtsset_insert(BytestringSet* set, Output* out) { + return ffi::wrap([=] { set->insert(out->moveToFbString()); }); +} + +const char* glean_rtsset_build(BytestringSet* set, Output* out) { + return ffi::wrap([=] { + out->packed(set->size()); + for (const auto& v : *set) { + out->bytes(v.data(), v.size()); + } + }); +} + +void glean_rtsset_free(BytestringSet* set) { + return ffi::free_(set); +} + +const char* glean_wordrtsset_new(WordSet** set) { + return ffi::wrap([=] { *set = new WordSet(); }); +} + +const char* glean_wordrtsset_insert(WordSet* set, uint64_t value) { + return ffi::wrap([=] { set->insert(value); }); +} + +const char* +glean_wordrtsset_insert_bytes(WordSet* set, const uint8_t* data, size_t size) { + return ffi::wrap([=] { + for (auto i = 0; i < size; i++) { + set->insert(data[i]); + } + }); +} + +const char* glean_wordrtsset_build(WordSet* set, Output* out) { + return ffi::wrap([=] { + out->packed(set->size()); + for (const auto& v : *set) { + out->packed(v); + } + }); +} + +const char* glean_wordrtsset_build_bytes(WordSet* set, Output* out) { + return ffi::wrap([=] { + out->packed(set->size()); + for (const auto& v : *set) { + out->fixed(v); + } + }); +} + +void glean_wordrtsset_free(WordSet* set) { + return ffi::free_(set); +} + const char* glean_stacked_lookup_new(Lookup* base, Lookup* added, Lookup** stacked) { return ffi::wrap([=] { *stacked = new Stacked(base, added); }); diff --git a/glean/test/tests/Schema/Evolves.hs b/glean/test/tests/Schema/Evolves.hs index 74f793e70..50d98c221 100644 --- a/glean/test/tests/Schema/Evolves.hs +++ b/glean/test/tests/Schema/Evolves.hs @@ -1510,7 +1510,182 @@ schemaEvolvesTransformations = , RTS.Tuple [RTS.Nat 2, nothing] ] ] ] facts + , TestLabel "transform predicate within set" $ TestCase $ do + withSchemaAndFactsQ + [s| + schema x.1 { + type T = { a : nat } + predicate P: { x : set T } + } + schema x.2 { + type T = { a : nat, b : maybe nat } + predicate P: { x : set T } + } + schema x.2 evolves x.1 + schema all.1 : x.1, x.2 {} + |] + [ mkBatch (PredicateRef "x.P" 1) + [ [s|{ "key": { "x": [{ "a": 1 }, { "a": 2 }] } }|] ] + ] + [s| x.P.2 _ |] + $ \byRef response _ -> do + facts <- decodeResultsAs (SourceRef "x.P" (Just 2)) byRef response + assertEqual "result content" + [ RTS.Tuple + [ RTS.Array + [ RTS.Tuple [RTS.Nat 1, nothing] + , RTS.Tuple [RTS.Nat 2, nothing] + ] ] ] + facts + , TestLabel "evolve array to set of nat" $ TestCase $ do + withSchemaAndFactsQ + [s| + schema x.1 { + predicate P: { x : [nat] } + } + schema x.2 { + predicate P: { x : set nat } + } + schema x.2 evolves x.1 + schema all.1 : x.1, x.2 {} + |] + [ mkBatch (PredicateRef "x.P" 1) + [ [s|{ "key": { "x": [ 1 , 1, 2 ] } }|] ] + ] + [s| x.P.2 _ |] + $ \byRef response _ -> do + facts <- decodeResultsAs (SourceRef "x.P" (Just 2)) byRef response + assertEqual "result content" + [ RTS.Tuple + [ RTS.Array + [ RTS.Nat 1 + , RTS.Nat 2 + ] ] ] + facts + , TestLabel "evolve set to array of nat" $ TestCase $ do + withSchemaAndFactsQ + [s| + schema x.1 { + predicate P: { x : set nat } + } + schema x.2 { + predicate P: { x : [nat] } + } + schema x.2 evolves x.1 + schema all.1 : x.1, x.2 {} + |] + [ mkBatch (PredicateRef "x.P" 1) + [ [s|{ "key": { "x": [ 1 , 1, 2 ] } }|] ] + ] + [s| x.P.2 _ |] + $ \byRef response _ -> do + facts <- decodeResultsAs (SourceRef "x.P" (Just 2)) byRef response + assertEqual "result content" + [ RTS.Tuple + [ RTS.Array + [ RTS.Nat 1 + , RTS.Nat 2 + ] ] ] + facts + , TestLabel "evolve set to array of byte" $ TestCase $ do + withSchemaAndFactsQ + [s| + schema x.1 { + predicate P: { x : set byte } + } + schema x.2 { + predicate P: { x : [byte] } + } + schema x.2 evolves x.1 + schema all.1 : x.1, x.2 {} + |] + [ mkBatch (PredicateRef "x.P" 1) + [ [s|{ "key": { "x": [ 1 , 1, 2 ] } }|] ] + ] + [s| x.P.2 _ |] + $ \byRef response _ -> do + facts <- decodeResultsAs (SourceRef "x.P" (Just 2)) byRef response + assertEqual "result content" + [ RTS.Tuple + [ RTS.ByteArray "\SOH\STX" ] ] + facts + , TestLabel "evolve array to set of byte" $ TestCase $ do + withSchemaAndFactsQ + [s| + schema x.1 { + predicate P: { x : [byte] } + } + schema x.2 { + predicate P: { x : set byte } + } + schema x.2 evolves x.1 + schema all.1 : x.1, x.2 {} + |] + [ mkBatch (PredicateRef "x.P" 1) + [ [s|{ "key": { "x": [ 1 , 1, 2 ] } }|] ] + ] + [s| x.P.2 _ |] + $ \byRef response _ -> do + facts <- decodeResultsAs (SourceRef "x.P" (Just 2)) byRef response + assertEqual "result content" + [ RTS.Tuple + [ RTS.Array [RTS.Byte 1, RTS.Byte 2] ] ] + facts + , TestLabel "evolve array to set of pred" $ TestCase $ do + withSchemaAndFactsQ + [s| + schema x.1 { + type T = { a : nat } + predicate P: { x : [T] } + } + schema x.2 { + type T = { a : nat, b : maybe nat } + predicate P: { x : set T } + } + schema x.2 evolves x.1 + schema all.1 : x.1, x.2 {} + |] + [ mkBatch (PredicateRef "x.P" 1) + [ [s|{ "key": { "x": [{ "a": 1 }, { "a": 2 }] } }|] ] + ] + [s| x.P.2 _ |] + $ \byRef response _ -> do + facts <- decodeResultsAs (SourceRef "x.P" (Just 2)) byRef response + assertEqual "result content" + [ RTS.Tuple + [ RTS.Array + [ RTS.Tuple [RTS.Nat 1, nothing] + , RTS.Tuple [RTS.Nat 2, nothing] + ] ] ] + facts + , TestLabel "evolve set to array of pred" $ TestCase $ do + withSchemaAndFactsQ + [s| + schema x.1 { + type T = { a : nat } + predicate P: { x : set T } + } + schema x.2 { + type T = { a : nat, b : maybe nat } + predicate P: { x : [T] } + } + schema x.2 evolves x.1 + schema all.1 : x.1, x.2 {} + |] + [ mkBatch (PredicateRef "x.P" 1) + [ [s|{ "key": { "x": [{ "a": 1 }, { "a": 2 }] } }|] ] + ] + [s| x.P.2 _ |] + $ \byRef response _ -> do + facts <- decodeResultsAs (SourceRef "x.P" (Just 2)) byRef response + assertEqual "result content" + [ RTS.Tuple + [ RTS.Array + [ RTS.Tuple [RTS.Nat 1, nothing] + , RTS.Tuple [RTS.Nat 2, nothing] + ] ] ] + facts ] where -- run a userQuery using the given schemas and facts