Skip to content

Commit

Permalink
Release deduped batch after serializing
Browse files Browse the repository at this point in the history
Summary:
We were keeping the deduped batch around in memory
unnecessarily. Should help with memory usage on the write server under
heavy load.

Reviewed By: pepeiborra

Differential Revision: D51584191

fbshipit-source-id: 60cc48a5c33ce7033f24aab7499a21b21471f7fa
  • Loading branch information
Simon Marlow authored and facebook-github-bot committed Nov 27, 2023
1 parent c877683 commit 7c6f412
Showing 1 changed file with 22 additions and 17 deletions.
39 changes: 22 additions & 17 deletions glean/db/Glean/Database/Write/Batch.hs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ deDupBatch
-> IO Subst
deDupBatch env repo odb lookup writing original_size batch maybeOwn = do
next_id <- readIORef $ wrNextId writing
bracket
(maybe_deduped_batch, dsubst) <- bracket
(
withLookupCache repo writing lookup $ \cache ->
-- We need a snapshot here because we don't want lookups to
Expand All @@ -240,26 +240,31 @@ deDupBatch env repo odb lookup writing original_size batch maybeOwn = do
(\(deduped_facts, dsubst) -> do
factCount <- FactSet.factCount deduped_facts
if factCount == 0 && isNothing maybeOwn then
return dsubst
return (Nothing, dsubst)
else do
deduped_batch <- FactSet.serialize deduped_facts
let !is = coerce Subst.substIntervals dsubst
<$> Thrift.batch_owned batch
!deps = substDependencies dsubst
<$> Thrift.batch_dependencies batch
forM_ maybeOwn $ \ownBatch ->
Ownership.substDefineOwnership ownBatch dsubst
-- And now write it do the DB, deduplicating again
wsubst <- withMutex (wrLock writing) $ const $
reallyWriteBatch env repo odb lookup writing original_size True
deduped_batch
{ Thrift.batch_owned = is
, Thrift.batch_dependencies = deps
}
maybeOwn
return $ dsubst <> wsubst
return (Just deduped_batch, dsubst)
)

case maybe_deduped_batch of
Nothing -> return dsubst
Just deduped_batch -> do
let !is = coerce Subst.substIntervals dsubst
<$> Thrift.batch_owned batch
!deps = substDependencies dsubst
<$> Thrift.batch_dependencies batch
forM_ maybeOwn $ \ownBatch ->
Ownership.substDefineOwnership ownBatch dsubst
-- And now write it do the DB, deduplicating again
wsubst <- withMutex (wrLock writing) $ const $
reallyWriteBatch env repo odb lookup writing original_size True
deduped_batch
{ Thrift.batch_owned = is
, Thrift.batch_dependencies = deps
}
maybeOwn
return $ dsubst <> wsubst

withLookupCache
:: Repo
-> Writing
Expand Down

0 comments on commit 7c6f412

Please sign in to comment.