-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathContentHashable.hs
603 lines (513 loc) · 22.2 KB
/
ContentHashable.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Use camelCase" #-}
-- | 'ContentHashable' provides a hashing function suitable for use in the
-- Funflow content store.
--
-- This behaves as does a normal hashing function on Haskell types. However,
-- on path types, this instead calculates a hash based on the contents of the
-- file or directory referenced.
--
-- We also export the 'ExternallyAssuredFile' and 'ExternallyAssuredDirectory'
-- types. These instead use the path, file size and modification time to control
-- the hash.
module Data.CAS.ContentHashable
( ContentHash,
toBytes,
fromBytes,
ContentHashable (..),
contentHashUpdate_binaryFile,
contentHashUpdate_byteArray#,
contentHashUpdate_fingerprint,
contentHashUpdate_primitive,
contentHashUpdate_storable,
FileContent (..),
DirectoryContent (..),
ExternallyAssuredFile (..),
ExternallyAssuredDirectory (..),
encodeHash,
decodeHash,
hashToPath,
pathToHash,
SHA256,
Context,
Digest,
)
where
import Control.Exception.Safe (catchJust)
import Control.Monad (foldM, (>=>))
import Control.Monad.IO.Class (MonadIO, liftIO)
import Crypto.Hash
( Context,
Digest,
SHA256,
digestFromByteString,
hashFinalize,
hashInit,
hashUpdate,
)
import qualified Data.Aeson as Aeson
import qualified Data.Aeson.Types as Aeson
import qualified Data.Aeson.KeyMap as Aeson
import qualified Data.Aeson.Key as Aeson
import Data.ByteArray
( Bytes,
MemView (MemView),
allocAndFreeze,
convert,
)
import Data.ByteArray.Encoding
( Base (Base16),
convertFromBase,
convertToBase,
)
import qualified Data.ByteString as BS
import Data.ByteString.Builder.Extra (defaultChunkSize)
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as BSL
import Data.Foldable (foldlM)
import qualified Data.HashMap.Lazy as HashMap
import qualified Data.HashSet as HashSet
import qualified Data.Hashable
import Data.Int
import Data.List (sort)
import Data.List.NonEmpty (NonEmpty)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Ratio
import Data.Scientific
import qualified Data.Text as T
import qualified Data.Text.Array as TA
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Internal as T
import qualified Data.Text.Lazy as TL
import Data.Time.Clock (UTCTime)
import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds)
import Data.Typeable
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Unboxed as VU
import Data.Word
import Foreign.Marshal.Utils (with)
import Foreign.Ptr (castPtr)
import Foreign.Storable (Storable, sizeOf)
import GHC.Fingerprint
import GHC.Generics
import GHC.Integer.GMP.Internals (BigNat (..), Integer (..))
import GHC.Natural (Natural (..))
import GHC.Prim
( ByteArray#,
copyByteArrayToAddr#,
sizeofByteArray#,
)
import GHC.Ptr (Ptr (Ptr))
import GHC.Types (IO (IO), Int (I#), Word (W#))
import qualified Path
import qualified Path.IO
import qualified Path.Internal
import System.IO
( IOMode (ReadMode),
withBinaryFile,
)
import System.IO.Error (isPermissionError)
import System.IO.Unsafe (unsafePerformIO)
import System.Posix.Files (fileSize, getFileStatus)
newtype ContentHash = ContentHash {unContentHash :: Digest SHA256}
deriving (Eq, Ord, Generic)
instance Aeson.FromJSON ContentHash where
parseJSON (Aeson.String s)
| Just h <- decodeHash (TE.encodeUtf8 s) = pure h
| otherwise = fail "Invalid hash encoding"
parseJSON invalid =
Aeson.typeMismatch "ContentHash" invalid
instance Aeson.ToJSON ContentHash where
toJSON = Aeson.String . TE.decodeUtf8 . encodeHash
instance Data.Hashable.Hashable ContentHash where
hashWithSalt s = Data.Hashable.hashWithSalt s . encodeHash
instance Show ContentHash where
showsPrec d h =
showParen (d > app_prec) $
showString "ContentHash \""
. (showString $ C8.unpack $ encodeHash h)
. showString "\""
where
app_prec = 10
toBytes :: ContentHash -> BS.ByteString
toBytes = convert . unContentHash
fromBytes :: BS.ByteString -> Maybe ContentHash
fromBytes bs = ContentHash <$> digestFromByteString bs
hashEncoding :: Base
hashEncoding = Base16
-- | File path appropriate encoding of a hash
encodeHash :: ContentHash -> BS.ByteString
encodeHash = convertToBase hashEncoding . toBytes
-- | Inverse of 'encodeHash' if given a valid input.
--
-- prop> decodeHash (encodeHash x) = Just x
decodeHash :: BS.ByteString -> Maybe ContentHash
decodeHash bs = case convertFromBase hashEncoding bs of
Left _ -> Nothing
Right x -> fromBytes x
-- | File path appropriate encoding of a hash
hashToPath :: ContentHash -> Path.Path Path.Rel Path.Dir
hashToPath h =
case Path.parseRelDir $ C8.unpack $ encodeHash h of
Nothing ->
error
"[ContentHashable.hashToPath] \
\Failed to convert hash to directory name"
Just dir -> dir
-- | Inverse of 'hashToPath' if given a valid input.
--
-- prop> pathToHash (hashToPath x) = Just x
pathToHash :: FilePath -> Maybe ContentHash
pathToHash = decodeHash . C8.pack
class Monad m => ContentHashable m a where
-- | Update a hash context based on the given value.
--
-- See 'Crypto.Hash.hashUpdate'.
--
-- XXX: Consider swapping the arguments.
contentHashUpdate :: Context SHA256 -> a -> m (Context SHA256)
default contentHashUpdate ::
(Generic a, GContentHashable m (Rep a)) =>
Context SHA256 ->
a ->
m (Context SHA256)
contentHashUpdate ctx a = gContentHashUpdate ctx (from a)
-- | Generate hash of the given value.
--
-- See 'Crypto.Hash.hash'.
contentHash :: a -> m ContentHash
contentHash x = ContentHash . hashFinalize <$> contentHashUpdate hashInit x
-- | Update hash context based on binary in memory representation due to 'Foreign.Storable.Storable'.
--
-- XXX: Do we need to worry about endianness?
contentHashUpdate_storable :: (Monad m, Storable a) => Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate_storable ctx a =
return . unsafePerformIO $ with a (\p -> pure $! hashUpdate ctx (MemView (castPtr p) (sizeOf a)))
-- | Update hash context based on a type's 'GHC.Fingerprint.Type.Fingerprint'.
--
-- The fingerprint is constructed from the library-name, module-name, and name of the type itself.
contentHashUpdate_fingerprint :: (Monad m, Typeable a) => Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate_fingerprint ctx = contentHashUpdate ctx . typeRepFingerprint . typeOf
-- | Update hash context by combining 'contentHashUpdate_fingerprint' and 'contentHashUpdate_storable'.
-- Intended for primitive types like 'Int'.
contentHashUpdate_primitive :: (Monad m, Typeable a, Storable a) => Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate_primitive ctx a =
flip contentHashUpdate_fingerprint a >=> flip contentHashUpdate_storable a $ ctx
-- | Update hash context based on binary contents of the given file.
contentHashUpdate_binaryFile :: Context SHA256 -> FilePath -> IO (Context SHA256)
contentHashUpdate_binaryFile ctx0 fp = withBinaryFile fp ReadMode $ \h ->
let go ctx = do
chunk <- BS.hGetSome h defaultChunkSize
if BS.null chunk
then pure ctx
else go $! hashUpdate ctx chunk
in go ctx0
-- | Update hash context based on 'GHC.Prim.ByteArray#'
-- by copying into a newly allocated 'Data.ByteArray.Bytes'
-- and updating the hash context from there.
--
-- XXX: @'GHC.Prim.byteArrayContents#' :: 'GHC.Prim.ByteArray#' -> 'GHC.Prim.Addr#'@
-- could be used together with 'Data.ByteArray.MemView' instead.
-- However, 'GHC.Prim.byteArrayContents#' explicitly says, that it is only safe to use
-- on a pinned 'GHC.Prim.ByteArray#'.
contentHashUpdate_byteArray# :: ByteArray# -> Int -> Int -> Context SHA256 -> Context SHA256
contentHashUpdate_byteArray# ba (I# off) (I# len) ctx = hashUpdate ctx $
allocAndFreeze @Bytes (I# len) $ \(Ptr addr) -> IO $ \s ->
(# copyByteArrayToAddr# ba off addr len s, () #)
-- | Update hash context based on the contents of a strict 'Data.Text.Text'.
contentHashUpdate_text :: Context SHA256 -> T.Text -> Context SHA256
contentHashUpdate_text ctx (T.Text (TA.ByteArray arr) off len) =
contentHashUpdate_byteArray# arr off len ctx
instance Monad m => ContentHashable m Fingerprint where
contentHashUpdate ctx (Fingerprint a b) = flip contentHashUpdate_storable a >=> flip contentHashUpdate_storable b $ ctx
instance Monad m => ContentHashable m Bool where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Char where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Int where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Int8 where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Int16 where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Int32 where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Int64 where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Word where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Word8 where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Word16 where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Word32 where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Word64 where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Float where contentHashUpdate = contentHashUpdate_primitive
instance Monad m => ContentHashable m Double where contentHashUpdate = contentHashUpdate_primitive
instance (ContentHashable m n, Typeable n) => ContentHashable m (Ratio n) where
contentHashUpdate ctx x =
flip contentHashUpdate_fingerprint x
>=> flip contentHashUpdate (numerator x)
>=> flip contentHashUpdate (denominator x)
$ ctx
instance Monad m => ContentHashable m Scientific where
contentHashUpdate ctx x =
flip contentHashUpdate_fingerprint x
>=> flip contentHashUpdate (toRational x)
$ ctx
instance Monad m => ContentHashable m Integer where
contentHashUpdate ctx n =
($ ctx) $
flip contentHashUpdate_fingerprint n >=> case n of
S# i ->
pure . flip hashUpdate (C8.pack "S") -- tag constructur
>=> flip contentHashUpdate_storable (I# i) -- hash field
Jp# (BN# ba) ->
pure . flip hashUpdate (C8.pack "L") -- tag constructur
>=> pure . contentHashUpdate_byteArray# ba 0 (I# (sizeofByteArray# ba)) -- hash field
Jn# (BN# ba) ->
pure . flip hashUpdate (C8.pack "N") -- tag constructur
>=> pure . contentHashUpdate_byteArray# ba 0 (I# (sizeofByteArray# ba)) -- hash field
instance Monad m => ContentHashable m Natural where
contentHashUpdate ctx n =
($ ctx) $
flip contentHashUpdate_fingerprint n >=> case n of
NatS# w ->
pure . flip hashUpdate (C8.pack "S") -- tag constructur
>=> flip contentHashUpdate_storable (W# w) -- hash field
NatJ# (BN# ba) ->
pure . flip hashUpdate (C8.pack "L") -- tag constructur
>=> pure . contentHashUpdate_byteArray# ba 0 (I# (sizeofByteArray# ba)) -- hash field
instance Monad m => ContentHashable m BS.ByteString where
contentHashUpdate ctx s =
flip contentHashUpdate_fingerprint s
>=> pure . flip hashUpdate s
$ ctx
instance Monad m => ContentHashable m BSL.ByteString where
contentHashUpdate ctx s =
flip contentHashUpdate_fingerprint s
>=> pure . flip (BSL.foldlChunks hashUpdate) s
$ ctx
instance Monad m => ContentHashable m T.Text where
contentHashUpdate ctx s =
flip contentHashUpdate_fingerprint s
>=> pure . flip contentHashUpdate_text s
$ ctx
instance Monad m => ContentHashable m TL.Text where
contentHashUpdate ctx s =
flip contentHashUpdate_fingerprint s
>=> pure . flip (TL.foldlChunks contentHashUpdate_text) s
$ ctx
instance
(Typeable k, Typeable v, ContentHashable m k, ContentHashable m v) =>
ContentHashable m (Map k v)
where
contentHashUpdate ctx m =
flip contentHashUpdate_fingerprint m
>=> flip contentHashUpdate (Map.toList m)
$ ctx
instance
(Typeable k, Typeable v, ContentHashable m k, ContentHashable m v) =>
ContentHashable m (HashMap.HashMap k v)
where
contentHashUpdate ctx m =
flip contentHashUpdate_fingerprint m
-- XXX: The order of the list is unspecified.
>=> flip contentHashUpdate (HashMap.toList m)
$ ctx
instance
(Typeable v, ContentHashable m v) =>
ContentHashable m (HashSet.HashSet v)
where
contentHashUpdate ctx s =
flip contentHashUpdate_fingerprint s
-- XXX: The order of the list is unspecified.
>=> flip contentHashUpdate (HashSet.toList s)
$ ctx
instance
(Typeable a, ContentHashable m a) =>
ContentHashable m [a]
where
contentHashUpdate ctx l =
flip contentHashUpdate_fingerprint l
>=> flip (foldM contentHashUpdate) l
$ ctx
instance
(Typeable a, ContentHashable m a) =>
ContentHashable m (NonEmpty a)
where
contentHashUpdate ctx l =
flip contentHashUpdate_fingerprint l
>=> flip (foldlM contentHashUpdate) l
$ ctx
instance
(Typeable a, ContentHashable m a) =>
ContentHashable m (V.Vector a)
where
contentHashUpdate ctx v =
flip contentHashUpdate_fingerprint v
>=> flip (V.foldM' contentHashUpdate) v
$ ctx
-- TODO: Rewrite using MemView
instance
(Typeable a, Storable a, ContentHashable m a) =>
ContentHashable m (VS.Vector a)
where
contentHashUpdate ctx v =
flip contentHashUpdate_fingerprint v
>=> flip (VS.foldM' contentHashUpdate) v
$ ctx
instance
(Typeable a, VU.Unbox a, ContentHashable m a) =>
ContentHashable m (VU.Vector a)
where
contentHashUpdate ctx v =
flip contentHashUpdate_fingerprint v
>=> flip (VU.foldM' contentHashUpdate) v
$ ctx
instance Monad m => ContentHashable m ()
instance (ContentHashable m a, ContentHashable m b) => ContentHashable m (a, b)
instance (ContentHashable m a, ContentHashable m b, ContentHashable m c) => ContentHashable m (a, b, c)
instance (ContentHashable m a, ContentHashable m b, ContentHashable m c, ContentHashable m d) => ContentHashable m (a, b, c, d)
instance (ContentHashable m a, ContentHashable m b, ContentHashable m c, ContentHashable m d, ContentHashable m e) => ContentHashable m (a, b, c, d, e)
instance (Monad m, ContentHashable m a, ContentHashable m b, ContentHashable m c, ContentHashable m d, ContentHashable m e, ContentHashable m f) => ContentHashable m (a, b, c, d, e, f)
instance (Monad m, ContentHashable m a, ContentHashable m b, ContentHashable m c, ContentHashable m d, ContentHashable m e, ContentHashable m f, ContentHashable m g) => ContentHashable m (a, b, c, d, e, f, g)
instance ContentHashable m a => ContentHashable m (Maybe a)
instance (ContentHashable m a, ContentHashable m b) => ContentHashable m (Either a b)
instance Monad m => ContentHashable m Aeson.Value
instance Monad m => ContentHashable m (Aeson.KeyMap Aeson.Value) where
contentHashUpdate ctx m = contentHashUpdate ctx (Aeson.toList m)
instance Monad m => ContentHashable m Aeson.Key where
contentHashUpdate ctx k = contentHashUpdate ctx (Aeson.toText k)
class Monad m => GContentHashable m f where
gContentHashUpdate :: Context SHA256 -> f a -> m (Context SHA256)
instance Monad m => GContentHashable m V1 where
gContentHashUpdate ctx _ = pure ctx
instance Monad m => GContentHashable m U1 where
gContentHashUpdate ctx U1 = pure ctx
instance ContentHashable m c => GContentHashable m (K1 i c) where
gContentHashUpdate ctx x = contentHashUpdate ctx (unK1 x)
instance (Constructor c, GContentHashable m f) => GContentHashable m (C1 c f) where
gContentHashUpdate ctx0 x = nameCtx `seq` gContentHashUpdate nameCtx (unM1 x)
where
nameCtx = hashUpdate ctx0 $ C8.pack (conName x)
instance (Datatype d, GContentHashable m f) => GContentHashable m (D1 d f) where
gContentHashUpdate ctx0 x = packageCtx `seq` gContentHashUpdate packageCtx (unM1 x)
where
datatypeCtx = hashUpdate ctx0 $ C8.pack (datatypeName x)
moduleCtx = hashUpdate datatypeCtx $ C8.pack (datatypeName x)
packageCtx = hashUpdate moduleCtx $ C8.pack (datatypeName x)
instance GContentHashable m f => GContentHashable m (S1 s f) where
gContentHashUpdate ctx x = gContentHashUpdate ctx (unM1 x)
instance (GContentHashable m a, GContentHashable m b) => GContentHashable m (a :*: b) where
gContentHashUpdate ctx (x :*: y) = gContentHashUpdate ctx x >>= \ctx' -> ctx' `seq` gContentHashUpdate ctx' y
instance (GContentHashable m a, GContentHashable m b) => GContentHashable m (a :+: b) where
gContentHashUpdate ctx (L1 x) = gContentHashUpdate ctx x
gContentHashUpdate ctx (R1 x) = gContentHashUpdate ctx x
-- XXX: Do we need this?
-- instance GContentHashable (a :.: b) where
-- gContentHashUpdate ctx x = _ (unComp1 x)
instance (Monad m, Typeable b, Typeable t) => ContentHashable m (Path.Path b t) where
contentHashUpdate ctx p@(Path.Internal.Path fp) =
flip contentHashUpdate_fingerprint p
>=> flip contentHashUpdate fp
$ ctx
-- | Path to a regular file
--
-- Only the file's content and its executable permission is taken into account
-- when generating the content hash. The path itself is ignored.
newtype FileContent = FileContent (Path.Path Path.Abs Path.File)
instance ContentHashable IO FileContent where
contentHashUpdate ctx (FileContent fp) = do
exec <- Path.IO.executable <$> Path.IO.getPermissions fp
ctx' <- if exec then contentHashUpdate ctx () else pure ctx
contentHashUpdate_binaryFile ctx' (Path.fromAbsFile fp)
-- | Path to a directory
--
-- Only the contents of the directory and their path relative to the directory
-- are taken into account when generating the content hash.
-- The path to the directory is ignored.
newtype DirectoryContent = DirectoryContent (Path.Path Path.Abs Path.Dir)
instance MonadIO m => ContentHashable m DirectoryContent where
contentHashUpdate ctx0 (DirectoryContent dir0) = liftIO $ do
(dirs, files) <- Path.IO.listDir dir0
ctx' <- foldM hashFile ctx0 (sort files)
foldM hashDir ctx' (sort dirs)
where
hashFile ctx fp =
-- XXX: Do we need to treat symbolic links specially?
flip contentHashUpdate (Path.filename fp)
>=> flip contentHashUpdate (FileContent fp)
$ ctx
hashDir ctx dir =
flip contentHashUpdate (Path.dirname dir)
>=> flip contentHashUpdate (DirectoryContent dir)
$ ctx
instance Monad m => ContentHashable m UTCTime where
contentHashUpdate ctx utcTime =
let secondsSinceEpoch = fromEnum . utcTimeToPOSIXSeconds $ utcTime
in flip contentHashUpdate_fingerprint utcTime
>=> flip contentHashUpdate secondsSinceEpoch
$ ctx
-- | Path to a file to be treated as _externally assured_.
--
-- An externally assured file is handled in a somewhat 'cheating' way by
-- funflow. The 'ContentHashable' instance for such assumes that some external
-- agent guarantees the integrity of the file being referenced. Thus, rather
-- than hashing the file contents, we only consider its (absolute) path, size and
-- modification time, which can be rapidly looked up from filesystem metadata.
--
-- For a similar approach, see the instance for 'ObjectInBucket' in
-- Data.CAS.ContentHashable.S3 (in `cas-hashable-s3` package), where we
-- exploit the fact that S3 is already content hashed to avoid performing any
-- hashing.
newtype ExternallyAssuredFile = ExternallyAssuredFile (Path.Path Path.Abs Path.File)
deriving (Generic, Show)
instance Aeson.FromJSON ExternallyAssuredFile
instance Aeson.ToJSON ExternallyAssuredFile
instance ContentHashable IO ExternallyAssuredFile where
contentHashUpdate ctx (ExternallyAssuredFile fp) = do
modTime <- Path.IO.getModificationTime fp
fSize <- fileSize <$> getFileStatus (Path.toFilePath fp)
flip contentHashUpdate fp
>=> flip contentHashUpdate modTime
>=> flip contentHashUpdate_storable fSize
$ ctx
-- | Path to a directory to be treated as _externally assured_.
--
-- For an externally assured directory, we _do_ traverse its contents and verify
-- those as we would externally assured files, rather than just relying on the
-- directory path. Doing this traversal is pretty cheap, and it's quite likely
-- for directory contents to be modified without modifying the contents.
--
-- If an item in the directory cannot be read due to lacking permissions,
-- then it will be ignored and not included in the hash. If the flow does not
-- have permissions to access the contents of a subdirectory, then these
-- contents cannot influence the outcome of a task and it is okay to exclude
-- them from the hash. In that case we only hash the name, as that could
-- influence the outcome of a task.
newtype ExternallyAssuredDirectory = ExternallyAssuredDirectory (Path.Path Path.Abs Path.Dir)
deriving (Generic, Show)
instance Aeson.FromJSON ExternallyAssuredDirectory
instance Aeson.ToJSON ExternallyAssuredDirectory
instance ContentHashable IO ExternallyAssuredDirectory where
contentHashUpdate ctx0 (ExternallyAssuredDirectory dir0) = do
-- Note that we don't bother looking at the relative directory paths and
-- including these in the hash. This is because the absolute hash gets
-- included every time we hash a file.
(dirs, files) <- Path.IO.listDir dir0
ctx' <- foldM hashFile ctx0 (sort files)
foldM hashDir ctx' (sort dirs)
where
hashFile ctx fp =
contentHashUpdate ctx (ExternallyAssuredFile fp)
`catchPermissionError` \_ -> contentHashUpdate ctx fp
hashDir ctx dir =
contentHashUpdate ctx (ExternallyAssuredDirectory dir)
`catchPermissionError` \_ -> contentHashUpdate ctx dir
catchPermissionError = catchJust $ \e ->
if isPermissionError e then Just e else Nothing