2 {-# LANGUAGE DeriveGeneric, DeriveFunctor, GeneralizedNewtypeDeriving,
3 NamedFieldPuns, BangPatterns, ScopedTypeVariables #-}
4 {-# OPTIONS_GHC -fno-warn-orphans #-}
5 {-# LANGUAGE ScopedTypeVariables #-}
7 -- | An abstraction to help with re-running actions when files or other
8 -- input values they depend on have changed.
10 module Distribution
.Client
.FileMonitor
(
12 -- * Declaring files to monitor
19 monitorNonExistentFile
,
22 monitorNonExistentDirectory
,
23 monitorDirectoryExistence
,
24 monitorFileOrDirectory
,
26 monitorFileGlobExistence
,
27 monitorFileSearchPath
,
28 monitorFileHashedSearchPath
,
30 -- * Creating and checking sets of monitored files
34 MonitorChangedReason
(..),
35 checkFileMonitorChanged
,
38 beginUpdateFileMonitor
,
47 import Distribution
.Client
.Compat
.Prelude
48 import qualified Distribution
.Compat
.Binary
as Binary
50 import qualified Data
.Map
.Strict
as Map
51 import Data
.Binary
.Get
(runGetOrFail
)
52 import qualified Data
.ByteString
.Lazy
as BS
53 import qualified Data
.Hashable
as Hashable
56 import Control
.Monad
.Trans
(MonadIO
, liftIO
)
57 import Control
.Monad
.State
(StateT
, mapStateT
)
58 import qualified Control
.Monad
.State
as State
59 import Control
.Monad
.Except
(ExceptT
, runExceptT
, withExceptT
,
61 import Control
.Exception
63 import Distribution
.Compat
.Time
64 import Distribution
.Client
.Glob
65 import Distribution
.Simple
.Utils
(handleDoesNotExist
, writeFileAtomic
)
66 import Distribution
.Client
.Utils
(mergeBy
, MergeResult
(..))
67 import Distribution
.Utils
.Structured
(structuredEncode
, Tag
(..))
68 import System
.FilePath
69 import System
.Directory
72 ------------------------------------------------------------------------------
73 -- Types for specifying files to monitor
77 -- | A description of a file (or set of files) to monitor for changes.
79 -- Where file paths are relative they are relative to a common directory
80 -- (e.g. project root), not necessarily the process current directory.
82 data MonitorFilePath
=
84 monitorKindFile
:: !MonitorKindFile
,
85 monitorKindDir
:: !MonitorKindDir
,
86 monitorPath
:: !FilePath
89 monitorKindFile
:: !MonitorKindFile
,
90 monitorKindDir
:: !MonitorKindDir
,
91 monitorPathGlob
:: !FilePathGlob
93 deriving (Eq
, Show, Generic
)
95 data MonitorKindFile
= FileExists
99 deriving (Eq
, Show, Generic
)
101 data MonitorKindDir
= DirExists
104 deriving (Eq
, Show, Generic
)
106 instance Binary MonitorFilePath
107 instance Binary MonitorKindFile
108 instance Binary MonitorKindDir
110 instance Structured MonitorFilePath
111 instance Structured MonitorKindFile
112 instance Structured MonitorKindDir
114 -- | Monitor a single file for changes, based on its modification time.
115 -- The monitored file is considered to have changed if it no longer
116 -- exists or if its modification time has changed.
118 monitorFile
:: FilePath -> MonitorFilePath
119 monitorFile
= MonitorFile FileModTime DirNotExists
121 -- | Monitor a single file for changes, based on its modification time
122 -- and content hash. The monitored file is considered to have changed if
123 -- it no longer exists or if its modification time and content hash have
126 monitorFileHashed
:: FilePath -> MonitorFilePath
127 monitorFileHashed
= MonitorFile FileHashed DirNotExists
129 -- | Monitor a single non-existent file for changes. The monitored file
130 -- is considered to have changed if it exists.
132 monitorNonExistentFile
:: FilePath -> MonitorFilePath
133 monitorNonExistentFile
= MonitorFile FileNotExists DirNotExists
135 -- | Monitor a single file for existence only. The monitored file is
136 -- considered to have changed if it no longer exists.
138 monitorFileExistence
:: FilePath -> MonitorFilePath
139 monitorFileExistence
= MonitorFile FileExists DirNotExists
141 -- | Monitor a single directory for changes, based on its modification
142 -- time. The monitored directory is considered to have changed if it no
143 -- longer exists or if its modification time has changed.
145 monitorDirectory
:: FilePath -> MonitorFilePath
146 monitorDirectory
= MonitorFile FileNotExists DirModTime
148 -- | Monitor a single non-existent directory for changes. The monitored
149 -- directory is considered to have changed if it exists.
151 monitorNonExistentDirectory
:: FilePath -> MonitorFilePath
152 -- Just an alias for monitorNonExistentFile, since you can't
153 -- tell the difference between a non-existent directory and
154 -- a non-existent file :)
155 monitorNonExistentDirectory
= monitorNonExistentFile
157 -- | Monitor a single directory for existence. The monitored directory is
158 -- considered to have changed only if it no longer exists.
160 monitorDirectoryExistence
:: FilePath -> MonitorFilePath
161 monitorDirectoryExistence
= MonitorFile FileNotExists DirExists
163 -- | Monitor a single file or directory for changes, based on its modification
164 -- time. The monitored file is considered to have changed if it no longer
165 -- exists or if its modification time has changed.
167 monitorFileOrDirectory
:: FilePath -> MonitorFilePath
168 monitorFileOrDirectory
= MonitorFile FileModTime DirModTime
170 -- | Monitor a set of files (or directories) identified by a file glob.
171 -- The monitored glob is considered to have changed if the set of files
172 -- matching the glob changes (i.e. creations or deletions), or for files if the
173 -- modification time and content hash of any matching file has changed.
175 monitorFileGlob
:: FilePathGlob
-> MonitorFilePath
176 monitorFileGlob
= MonitorFileGlob FileHashed DirExists
178 -- | Monitor a set of files (or directories) identified by a file glob for
179 -- existence only. The monitored glob is considered to have changed if the set
180 -- of files matching the glob changes (i.e. creations or deletions).
182 monitorFileGlobExistence
:: FilePathGlob
-> MonitorFilePath
183 monitorFileGlobExistence
= MonitorFileGlob FileExists DirExists
185 -- | Creates a list of files to monitor when you search for a file which
186 -- unsuccessfully looked in @notFoundAtPaths@ before finding it at
188 monitorFileSearchPath
:: [FilePath] -> FilePath -> [MonitorFilePath
]
189 monitorFileSearchPath notFoundAtPaths foundAtPath
=
190 monitorFile foundAtPath
191 : map monitorNonExistentFile notFoundAtPaths
193 -- | Similar to 'monitorFileSearchPath', but also instructs us to
194 -- monitor the hash of the found file.
195 monitorFileHashedSearchPath
:: [FilePath] -> FilePath -> [MonitorFilePath
]
196 monitorFileHashedSearchPath notFoundAtPaths foundAtPath
=
197 monitorFileHashed foundAtPath
198 : map monitorNonExistentFile notFoundAtPaths
201 ------------------------------------------------------------------------------
202 -- Implementation types, files status
205 -- | The state necessary to determine whether a set of monitored
206 -- files has changed. It consists of two parts: a set of specific
207 -- files to be monitored (index by their path), and a list of
208 -- globs, which monitor may files at once.
209 data MonitorStateFileSet
210 = MonitorStateFileSet
![MonitorStateFile
]
212 -- Morally this is not actually a set but a bag (represented by lists).
213 -- There is no principled reason to use a bag here rather than a set, but
214 -- there is also no particular gain either. That said, we do preserve the
215 -- order of the lists just to reduce confusion (and have predictable I/O
217 deriving (Show, Generic
)
219 instance Binary MonitorStateFileSet
220 instance Structured MonitorStateFileSet
224 -- | The state necessary to determine whether a monitored file has changed.
226 -- This covers all the cases of 'MonitorFilePath' except for globs which is
227 -- covered separately by 'MonitorStateGlob'.
229 -- The @Maybe ModTime@ is to cover the case where we already consider the
230 -- file to have changed, either because it had already changed by the time we
231 -- did the snapshot (i.e. too new, changed since start of update process) or it
232 -- no longer exists at all.
234 data MonitorStateFile
= MonitorStateFile
!MonitorKindFile
!MonitorKindDir
235 !FilePath !MonitorStateFileStatus
236 deriving (Show, Generic
)
238 data MonitorStateFileStatus
239 = MonitorStateFileExists
240 | MonitorStateFileModTime
!ModTime
-- ^ cached file mtime
241 | MonitorStateFileHashed
!ModTime
!Hash
-- ^ cached mtime and content hash
242 | MonitorStateDirExists
243 | MonitorStateDirModTime
!ModTime
-- ^ cached dir mtime
244 | MonitorStateNonExistent
245 | MonitorStateAlreadyChanged
246 deriving (Show, Generic
)
248 instance Binary MonitorStateFile
249 instance Binary MonitorStateFileStatus
250 instance Structured MonitorStateFile
251 instance Structured MonitorStateFileStatus
253 -- | The state necessary to determine whether the files matched by a globbing
254 -- match have changed.
256 data MonitorStateGlob
= MonitorStateGlob
!MonitorKindFile
!MonitorKindDir
257 !FilePathRoot
!MonitorStateGlobRel
258 deriving (Show, Generic
)
260 data MonitorStateGlobRel
261 = MonitorStateGlobDirs
262 !Glob
!FilePathGlobRel
264 ![(FilePath, MonitorStateGlobRel
)] -- invariant: sorted
266 | MonitorStateGlobFiles
269 ![(FilePath, MonitorStateFileStatus
)] -- invariant: sorted
271 | MonitorStateGlobDirTrailing
272 deriving (Show, Generic
)
274 instance Binary MonitorStateGlob
275 instance Binary MonitorStateGlobRel
277 instance Structured MonitorStateGlob
278 instance Structured MonitorStateGlobRel
280 -- | We can build a 'MonitorStateFileSet' from a set of 'MonitorFilePath' by
281 -- inspecting the state of the file system, and we can go in the reverse
282 -- direction by just forgetting the extra info.
284 reconstructMonitorFilePaths
:: MonitorStateFileSet
-> [MonitorFilePath
]
285 reconstructMonitorFilePaths
(MonitorStateFileSet singlePaths globPaths
) =
286 map getSinglePath singlePaths
++ map getGlobPath globPaths
288 getSinglePath
:: MonitorStateFile
-> MonitorFilePath
289 getSinglePath
(MonitorStateFile kindfile kinddir filepath _
) =
290 MonitorFile kindfile kinddir filepath
292 getGlobPath
:: MonitorStateGlob
-> MonitorFilePath
293 getGlobPath
(MonitorStateGlob kindfile kinddir root gstate
) =
294 MonitorFileGlob kindfile kinddir
$ FilePathGlob root
$
296 MonitorStateGlobDirs glob globs _ _
-> GlobDir glob globs
297 MonitorStateGlobFiles glob _ _
-> GlobFile glob
298 MonitorStateGlobDirTrailing
-> GlobDirTrailing
300 ------------------------------------------------------------------------------
301 -- Checking the status of monitored files
304 -- | A monitor for detecting changes to a set of files. It can be used to
305 -- efficiently test if any of a set of files (specified individually or by
306 -- glob patterns) has changed since some snapshot. In addition, it also checks
307 -- for changes in a value (of type @a@), and when there are no changes in
308 -- either it returns a saved value (of type @b@).
310 -- The main use case looks like this: suppose we have some expensive action
311 -- that depends on certain pure inputs and reads some set of files, and
312 -- produces some pure result. We want to avoid re-running this action when it
313 -- would produce the same result. So we need to monitor the files the action
314 -- looked at, the other pure input values, and we need to cache the result.
315 -- Then at some later point, if the input value didn't change, and none of the
316 -- files changed, then we can re-use the cached result rather than re-running
319 -- This can be achieved using a 'FileMonitor'. Each 'FileMonitor' instance
320 -- saves state in a disk file, so the file for that has to be specified,
321 -- making sure it is unique. The pattern is to use 'checkFileMonitorChanged'
322 -- to see if there's been any change. If there is, re-run the action, keeping
323 -- track of the files, then use 'updateFileMonitor' to record the current
324 -- set of files to monitor, the current input value for the action, and the
325 -- result of the action.
327 -- The typical occurrence of this pattern is captured by 'rerunIfChanged'
328 -- and the 'Rebuild' monad. More complicated cases may need to use
329 -- 'checkFileMonitorChanged' and 'updateFileMonitor' directly.
334 -- | The file where this 'FileMonitor' should store its state.
336 fileMonitorCacheFile
:: FilePath,
338 -- | Compares a new cache key with old one to determine if a
339 -- corresponding cached value is still valid.
341 -- Typically this is just an equality test, but in some
342 -- circumstances it can make sense to do things like subset
345 -- The first arg is the new value, the second is the old cached value.
347 fileMonitorKeyValid
:: a
-> a
-> Bool,
349 -- | When this mode is enabled, if 'checkFileMonitorChanged' returns
350 -- 'MonitoredValueChanged' then we have the guarantee that no files
351 -- changed, that the value change was the only change. In the default
352 -- mode no such guarantee is provided which is slightly faster.
354 fileMonitorCheckIfOnlyValueChanged
:: Bool
357 -- | Define a new file monitor.
359 -- It's best practice to define file monitor values once, and then use the
360 -- same value for 'checkFileMonitorChanged' and 'updateFileMonitor' as this
361 -- ensures you get the same types @a@ and @b@ for reading and writing.
363 -- The path of the file monitor itself must be unique because it keeps state
364 -- on disk and these would clash.
366 newFileMonitor
:: Eq a
=> FilePath -- ^ The file to cache the state of the
367 -- file monitor. Must be unique.
369 newFileMonitor path
= FileMonitor path
(==) False
371 -- | The result of 'checkFileMonitorChanged': either the monitored files or
372 -- value changed (and it tells us which it was) or nothing changed and we get
373 -- the cached result.
375 data MonitorChanged a b
=
376 -- | The monitored files and value did not change. The cached result is
379 -- The set of monitored files is also returned. This is useful
380 -- for composing or nesting 'FileMonitor's.
381 MonitorUnchanged b
[MonitorFilePath
]
383 -- | The monitor found that something changed. The reason is given.
385 | MonitorChanged
(MonitorChangedReason a
)
388 -- | What kind of change 'checkFileMonitorChanged' detected.
390 data MonitorChangedReason a
=
392 -- | One of the files changed (existence, file type, mtime or file
393 -- content, depending on the 'MonitorFilePath' in question)
394 MonitoredFileChanged
FilePath
396 -- | The pure input value changed.
398 -- The previous cached key value is also returned. This is sometimes
399 -- useful when using a 'fileMonitorKeyValid' function that is not simply
400 -- '(==)', when invalidation can be partial. In such cases it can make
401 -- sense to 'updateFileMonitor' with a key value that's a combination of
402 -- the new and old (e.g. set union).
403 | MonitoredValueChanged a
405 -- | There was no saved monitor state, cached value etc. Ie the file
406 -- for the 'FileMonitor' does not exist.
409 -- | There was existing state, but we could not read it. This typically
410 -- happens when the code has changed compared to an existing 'FileMonitor'
411 -- cache file and type of the input value or cached value has changed such
412 -- that we cannot decode the values. This is completely benign as we can
413 -- treat is just as if there were no cache file and re-run.
414 | MonitorCorruptCache
415 deriving (Eq
, Show, Functor
)
417 -- | Test if the input value or files monitored by the 'FileMonitor' have
418 -- changed. If not, return the cached value.
420 -- See 'FileMonitor' for a full explanation.
422 checkFileMonitorChanged
423 :: forall a b
. (Binary a
, Structured a
, Binary b
, Structured b
)
424 => FileMonitor a b
-- ^ cache file path
425 -> FilePath -- ^ root directory
426 -> a
-- ^ guard or key value
427 -> IO (MonitorChanged a b
) -- ^ did the key or any paths change?
428 checkFileMonitorChanged
429 monitor
@FileMonitor
{ fileMonitorKeyValid
,
430 fileMonitorCheckIfOnlyValueChanged
}
433 -- Consider it a change if the cache file does not exist,
434 -- or we cannot decode it. Sadly ErrorCall can still happen, despite
435 -- using decodeFileOrFail, e.g. Data.Char.chr errors
437 handleDoesNotExist
(MonitorChanged MonitorFirstRun
) $
438 handleErrorCall
(MonitorChanged MonitorCorruptCache
) $
439 withCacheFile monitor
$
440 either (\_
-> return (MonitorChanged MonitorCorruptCache
))
444 checkStatusCache
:: (MonitorStateFileSet
, a
, Either String b
) -> IO (MonitorChanged a b
)
445 checkStatusCache
(cachedFileStatus
, cachedKey
, cachedResult
) = do
446 change
<- checkForChanges
448 Just reason
-> return (MonitorChanged reason
)
449 Nothing
-> case cachedResult
of
450 Left _
-> pure
(MonitorChanged MonitorCorruptCache
)
451 Right cr
-> return (MonitorUnchanged cr monitorFiles
)
452 where monitorFiles
= reconstructMonitorFilePaths cachedFileStatus
454 -- In fileMonitorCheckIfOnlyValueChanged mode we want to guarantee that
455 -- if we return MonitoredValueChanged that only the value changed.
456 -- We do that by checking for file changes first. Otherwise it makes
457 -- more sense to do the cheaper test first.
458 checkForChanges
:: IO (Maybe (MonitorChangedReason a
))
460 | fileMonitorCheckIfOnlyValueChanged
461 = checkFileChange cachedFileStatus cachedKey cachedResult
463 checkValueChange cachedKey
466 = checkValueChange cachedKey
468 checkFileChange cachedFileStatus cachedKey cachedResult
470 mplusMaybeT
:: Monad m
=> m
(Maybe a1
) -> m
(Maybe a1
) -> m
(Maybe a1
)
471 mplusMaybeT ma mb
= do
475 Just x
-> return (Just x
)
477 -- Check if the guard value has changed
478 checkValueChange
:: a
-> IO (Maybe (MonitorChangedReason a
))
479 checkValueChange cachedKey
480 |
not (fileMonitorKeyValid currentKey cachedKey
)
481 = return (Just
(MonitoredValueChanged cachedKey
))
485 -- Check if any file has changed
486 checkFileChange
:: MonitorStateFileSet
-> a
-> Either String b
-> IO (Maybe (MonitorChangedReason a
))
487 checkFileChange cachedFileStatus cachedKey cachedResult
= do
488 res
<- probeFileSystem root cachedFileStatus
490 -- Some monitored file has changed
492 return (Just
(MonitoredFileChanged
(normalise changedPath
)))
494 -- No monitored file has changed
495 Right
(cachedFileStatus
', cacheStatus
) -> do
497 -- But we might still want to update the cache
498 whenCacheChanged cacheStatus
$
501 Right cr
-> rewriteCacheFile monitor cachedFileStatus
' cachedKey cr
505 -- | Lazily decode a triple, parsing the first two fields strictly and
506 -- returning a lazy value containing either the last one or an error.
507 -- This is helpful for cabal cache files where the first two components
508 -- contain header data that lets one test if the cache is still valid,
509 -- and the last (potentially large) component is the cached value itself.
510 -- This way we can test for cache validity without needing to pay the
511 -- cost of the decode of stale cache data. This lives here rather than
512 -- Distribution.Utils.Structured because it depends on a newer version of
513 -- binary than supported in the Cabal library proper.
514 structuredDecodeTriple
515 :: forall a b c
. (Structured a
, Structured b
, Structured c
, Binary
.Binary a
, Binary
.Binary b
, Binary
.Binary c
)
516 => BS
.ByteString
-> Either String (a
, b
, Either String c
)
517 structuredDecodeTriple lbs
=
519 (`runGetOrFail` lbs
) $ do
520 (_
:: Tag
(a
,b
,c
)) <- Binary
.get
521 (a
:: a
) <- Binary
.get
522 (b
:: b
) <- Binary
.get
524 cleanEither
(Left
(_
, pos
, msg
)) = Left
("Data.Binary.Get.runGet at position " ++ show pos
++ ": " ++ msg
)
525 cleanEither
(Right
(_
,_
,v
)) = Right v
527 in case partialDecode
of
528 Left
(_
, pos
, msg
) -> Left
("Data.Binary.Get.runGet at position " ++ show pos
++ ": " ++ msg
)
529 Right
(lbs
', _
, (x
,y
)) -> Right
(x
, y
, cleanEither
$ runGetOrFail
(Binary
.get
:: Binary
.Get c
) lbs
')
531 -- | Helper for reading the cache file.
533 -- This determines the type and format of the binary cache file.
535 withCacheFile
:: (Binary a
, Structured a
, Binary b
, Structured b
)
537 -> (Either String (MonitorStateFileSet
, a
, Either String b
) -> IO r
)
539 withCacheFile
(FileMonitor
{fileMonitorCacheFile
}) k
=
540 withBinaryFile fileMonitorCacheFile ReadMode
$ \hnd
-> do
541 contents
<- structuredDecodeTriple
<$> BS
.hGetContents hnd
544 -- | Helper for writing the cache file.
546 -- This determines the type and format of the binary cache file.
548 rewriteCacheFile
:: (Binary a
, Structured a
, Binary b
, Structured b
)
550 -> MonitorStateFileSet
-> a
-> b
-> IO ()
551 rewriteCacheFile FileMonitor
{fileMonitorCacheFile
} fileset key result
=
552 writeFileAtomic fileMonitorCacheFile
$
553 structuredEncode
(fileset
, key
, result
)
555 -- | Probe the file system to see if any of the monitored files have changed.
557 -- It returns Nothing if any file changed, or returns a possibly updated
558 -- file 'MonitorStateFileSet' plus an indicator of whether it actually changed.
560 -- We may need to update the cache since there may be changes in the filesystem
561 -- state which don't change any of our affected files.
563 -- Consider the glob @{proj1,proj2}\/\*.cabal@. Say we first run and find a
564 -- @proj1@ directory containing @proj1.cabal@ yet no @proj2@. If we later run
565 -- and find @proj2@ was created, yet contains no files matching @*.cabal@ then
566 -- we want to update the cache despite no changes in our relevant file set.
567 -- Specifically, we should add an mtime for this directory so we can avoid
568 -- re-traversing the directory in future runs.
570 probeFileSystem
:: FilePath -> MonitorStateFileSet
571 -> IO (Either FilePath (MonitorStateFileSet
, CacheChanged
))
572 probeFileSystem root
(MonitorStateFileSet singlePaths globPaths
) =
575 [ probeMonitorStateFileStatus root file status
576 | MonitorStateFile _ _ file status
<- singlePaths
]
577 -- The glob monitors can require state changes
580 [ probeMonitorStateGlob root globPath
581 | globPath
<- globPaths
]
582 return (MonitorStateFileSet singlePaths globPaths
')
585 -----------------------------------------------
586 -- Monad for checking for file system changes
588 -- We need to be able to bail out if we detect a change (using ExceptT),
589 -- but if there's no change we need to be able to rebuild the monitor
590 -- state. And we want to optimise that rebuilding by keeping track if
591 -- anything actually changed (using StateT), so that in the typical case
592 -- we can avoid rewriting the state file.
594 newtype ChangedM a
= ChangedM
(StateT CacheChanged
(ExceptT
FilePath IO) a
)
595 deriving (Functor
, Applicative
, Monad
, MonadIO
)
597 runChangedM
:: ChangedM a
-> IO (Either FilePath (a
, CacheChanged
))
598 runChangedM
(ChangedM action
) =
599 runExceptT
$ State
.runStateT action CacheUnchanged
601 somethingChanged
:: FilePath -> ChangedM a
602 somethingChanged path
= ChangedM
$ throwError path
604 cacheChanged
:: ChangedM
()
605 cacheChanged
= ChangedM
$ State
.put CacheChanged
607 mapChangedFile
:: (FilePath -> FilePath) -> ChangedM a
-> ChangedM a
608 mapChangedFile adjust
(ChangedM a
) =
609 ChangedM
(mapStateT
(withExceptT adjust
) a
)
611 data CacheChanged
= CacheChanged | CacheUnchanged
613 whenCacheChanged
:: Monad m
=> CacheChanged
-> m
() -> m
()
614 whenCacheChanged CacheChanged action
= action
615 whenCacheChanged CacheUnchanged _
= return ()
617 ----------------------
619 -- | Probe the file system to see if a single monitored file has changed.
621 probeMonitorStateFileStatus
:: FilePath -> FilePath
622 -> MonitorStateFileStatus
624 probeMonitorStateFileStatus root file status
=
626 MonitorStateFileExists
->
627 probeFileExistence root file
629 MonitorStateFileModTime mtime
->
630 probeFileModificationTime root file mtime
632 MonitorStateFileHashed mtime hash
->
633 probeFileModificationTimeAndHash root file mtime hash
635 MonitorStateDirExists
->
636 probeDirExistence root file
638 MonitorStateDirModTime mtime
->
639 probeFileModificationTime root file mtime
641 MonitorStateNonExistent
->
642 probeFileNonExistence root file
644 MonitorStateAlreadyChanged
->
645 somethingChanged file
648 -- | Probe the file system to see if a monitored file glob has changed.
650 probeMonitorStateGlob
:: FilePath -- ^ root path
652 -> ChangedM MonitorStateGlob
653 probeMonitorStateGlob relroot
654 (MonitorStateGlob kindfile kinddir globroot glob
) = do
655 root
<- liftIO
$ getFilePathRootDirectory globroot relroot
658 MonitorStateGlob kindfile kinddir globroot
<$>
659 probeMonitorStateGlobRel kindfile kinddir root
"." glob
661 -- for absolute cases, make the changed file we report absolute too
663 mapChangedFile
(root
</>) $
664 MonitorStateGlob kindfile kinddir globroot
<$>
665 probeMonitorStateGlobRel kindfile kinddir root
"" glob
667 probeMonitorStateGlobRel
:: MonitorKindFile
-> MonitorKindDir
668 -> FilePath -- ^ root path
669 -> FilePath -- ^ path of the directory we are
670 -- looking in relative to @root@
671 -> MonitorStateGlobRel
672 -> ChangedM MonitorStateGlobRel
673 probeMonitorStateGlobRel kindfile kinddir root dirName
674 (MonitorStateGlobDirs glob globPath mtime children
) = do
675 change
<- liftIO
$ checkDirectoryModificationTime
(root
</> dirName
) mtime
678 children
' <- sequence
679 [ do fstate
' <- probeMonitorStateGlobRel
680 kindfile kinddir root
681 (dirName
</> fname
) fstate
682 return (fname
, fstate
')
683 |
(fname
, fstate
) <- children
]
684 return $! MonitorStateGlobDirs glob globPath mtime children
'
687 -- directory modification time changed:
688 -- a matching subdir may have been added or deleted
689 matches
<- filterM (\entry
-> let subdir
= root
</> dirName
</> entry
690 in liftIO
$ doesDirectoryExist subdir
)
691 . filter (matchGlob glob
)
692 =<< liftIO
(getDirectoryContents (root
</> dirName
))
694 children
' <- traverse probeMergeResult
$
695 mergeBy
(\(path1
,_
) path2
-> compare path1 path2
)
698 return $! MonitorStateGlobDirs glob globPath mtime
' children
'
699 -- Note that just because the directory has changed, we don't force
700 -- a cache rewrite with 'cacheChanged' since that has some cost, and
701 -- all we're saving is scanning the directory. But we do rebuild the
702 -- cache with the new mtime', so that if the cache is rewritten for
703 -- some other reason, we'll take advantage of that.
706 probeMergeResult
:: MergeResult
(FilePath, MonitorStateGlobRel
) FilePath
707 -> ChangedM
(FilePath, MonitorStateGlobRel
)
709 -- Only in cached (directory deleted)
710 probeMergeResult
(OnlyInLeft
(path
, fstate
)) = do
711 case allMatchingFiles
(dirName
</> path
) fstate
of
712 [] -> return (path
, fstate
)
713 -- Strictly speaking we should be returning 'CacheChanged' above
714 -- as we should prune the now-missing 'MonitorStateGlobRel'. However
715 -- we currently just leave these now-redundant entries in the
716 -- cache as they cost no IO and keeping them allows us to avoid
717 -- rewriting the cache.
718 (file
:_
) -> somethingChanged file
720 -- Only in current filesystem state (directory added)
721 probeMergeResult
(OnlyInRight path
) = do
722 fstate
<- liftIO
$ buildMonitorStateGlobRel Nothing Map
.empty
723 kindfile kinddir root
(dirName
</> path
) globPath
724 case allMatchingFiles
(dirName
</> path
) fstate
of
725 (file
:_
) -> somethingChanged file
726 -- This is the only case where we use 'cacheChanged' because we can
727 -- have a whole new dir subtree (of unbounded size and cost), so we
728 -- need to save the state of that new subtree in the cache.
729 [] -> cacheChanged
>> return (path
, fstate
)
732 probeMergeResult
(InBoth
(path
, fstate
) _
) = do
733 fstate
' <- probeMonitorStateGlobRel kindfile kinddir
734 root
(dirName
</> path
) fstate
735 return (path
, fstate
')
737 -- | Does a 'MonitorStateGlob' have any relevant files within it?
738 allMatchingFiles
:: FilePath -> MonitorStateGlobRel
-> [FilePath]
739 allMatchingFiles dir
(MonitorStateGlobFiles _ _ entries
) =
740 [ dir
</> fname |
(fname
, _
) <- entries
]
741 allMatchingFiles dir
(MonitorStateGlobDirs _ _ _ entries
) =
743 |
(subdir
, fstate
) <- entries
744 , res
<- allMatchingFiles
(dir
</> subdir
) fstate
]
745 allMatchingFiles dir MonitorStateGlobDirTrailing
=
748 probeMonitorStateGlobRel _ _ root dirName
749 (MonitorStateGlobFiles glob mtime children
) = do
750 change
<- liftIO
$ checkDirectoryModificationTime
(root
</> dirName
) mtime
751 mtime
' <- case change
of
752 Nothing
-> return mtime
754 -- directory modification time changed:
755 -- a matching file may have been added or deleted
756 matches
<- return . filter (matchGlob glob
)
757 =<< liftIO
(getDirectoryContents (root
</> dirName
))
759 traverse_ probeMergeResult
$
760 mergeBy
(\(path1
,_
) path2
-> compare path1 path2
)
765 -- Check that none of the children have changed
766 for_ children
$ \(file
, status
) ->
767 probeMonitorStateFileStatus root
(dirName
</> file
) status
770 return (MonitorStateGlobFiles glob mtime
' children
)
771 -- Again, we don't force a cache rewrite with 'cacheChanged', but we do use
772 -- the new mtime' if any.
774 probeMergeResult
:: MergeResult
(FilePath, MonitorStateFileStatus
) FilePath
776 probeMergeResult mr
= case mr
of
777 InBoth _ _
-> return ()
778 -- this is just to be able to accurately report which file changed:
779 OnlyInLeft
(path
, _
) -> somethingChanged
(dirName
</> path
)
780 OnlyInRight path
-> somethingChanged
(dirName
</> path
)
782 probeMonitorStateGlobRel _ _ _ _ MonitorStateGlobDirTrailing
=
783 return MonitorStateGlobDirTrailing
785 ------------------------------------------------------------------------------
787 -- | Update the input value and the set of files monitored by the
788 -- 'FileMonitor', plus the cached value that may be returned in future.
790 -- This takes a snapshot of the state of the monitored files right now, so
791 -- 'checkFileMonitorChanged' will look for file system changes relative to
794 -- This is typically done once the action has been completed successfully and
795 -- we have the action's result and we know what files it looked at. See
796 -- 'FileMonitor' for a full explanation.
798 -- If we do take the snapshot after the action has completed then we have a
799 -- problem. The problem is that files might have changed /while/ the action was
800 -- running but /after/ the action read them. If we take the snapshot after the
801 -- action completes then we will miss these changes. The solution is to record
802 -- a timestamp before beginning execution of the action and then we make the
803 -- conservative assumption that any file that has changed since then has
804 -- already changed, ie the file monitor state for these files will be such that
805 -- 'checkFileMonitorChanged' will report that they have changed.
807 -- So if you do use 'updateFileMonitor' after the action (so you can discover
808 -- the files used rather than predicting them in advance) then use
809 -- 'beginUpdateFileMonitor' to get a timestamp and pass that. Alternatively,
810 -- if you take the snapshot in advance of the action, or you're not monitoring
811 -- any files then you can use @Nothing@ for the timestamp parameter.
814 :: (Binary a
, Structured a
, Binary b
, Structured b
)
815 => FileMonitor a b
-- ^ cache file path
816 -> FilePath -- ^ root directory
817 -> Maybe MonitorTimestamp
-- ^ timestamp when the update action started
818 -> [MonitorFilePath
] -- ^ files of interest relative to root
819 -> a
-- ^ the current key value
820 -> b
-- ^ the current result value
822 updateFileMonitor monitor root startTime monitorFiles
823 cachedKey cachedResult
= do
824 hashcache
<- readCacheFileHashes monitor
825 msfs
<- buildMonitorStateFileSet startTime hashcache root monitorFiles
826 rewriteCacheFile monitor msfs cachedKey cachedResult
828 -- | A timestamp to help with the problem of file changes during actions.
829 -- See 'updateFileMonitor' for details.
831 newtype MonitorTimestamp
= MonitorTimestamp ModTime
833 -- | Record a timestamp at the beginning of an action, and when the action
834 -- completes call 'updateFileMonitor' passing it the timestamp.
835 -- See 'updateFileMonitor' for details.
837 beginUpdateFileMonitor
:: IO MonitorTimestamp
838 beginUpdateFileMonitor
= MonitorTimestamp
<$> getCurTime
840 -- | Take the snapshot of the monitored files. That is, given the
841 -- specification of the set of files we need to monitor, inspect the state
842 -- of the file system now and collect the information we'll need later to
843 -- determine if anything has changed.
845 buildMonitorStateFileSet
:: Maybe MonitorTimestamp
-- ^ optional: timestamp
846 -- of the start of the action
847 -> FileHashCache
-- ^ existing file hashes
848 -> FilePath -- ^ root directory
849 -> [MonitorFilePath
] -- ^ patterns of interest
851 -> IO MonitorStateFileSet
852 buildMonitorStateFileSet mstartTime hashcache root
=
855 go
:: [MonitorStateFile
] -> [MonitorStateGlob
]
856 -> [MonitorFilePath
] -> IO MonitorStateFileSet
857 go
!singlePaths
!globPaths
[] =
858 return (MonitorStateFileSet
(reverse singlePaths
) (reverse globPaths
))
860 go
!singlePaths
!globPaths
861 (MonitorFile kindfile kinddir path
: monitors
) = do
862 monitorState
<- MonitorStateFile kindfile kinddir path
863 <$> buildMonitorStateFile mstartTime hashcache
864 kindfile kinddir root path
865 go
(monitorState
: singlePaths
) globPaths monitors
867 go
!singlePaths
!globPaths
868 (MonitorFileGlob kindfile kinddir globPath
: monitors
) = do
869 monitorState
<- buildMonitorStateGlob mstartTime hashcache
870 kindfile kinddir root globPath
871 go singlePaths
(monitorState
: globPaths
) monitors
874 buildMonitorStateFile
:: Maybe MonitorTimestamp
-- ^ start time of update
875 -> FileHashCache
-- ^ existing file hashes
876 -> MonitorKindFile
-> MonitorKindDir
877 -> FilePath -- ^ the root directory
879 -> IO MonitorStateFileStatus
880 buildMonitorStateFile mstartTime hashcache kindfile kinddir root path
= do
881 let abspath
= root
</> path
882 isFile
<- doesFileExist abspath
883 isDir
<- doesDirectoryExist abspath
884 case (isFile
, kindfile
, isDir
, kinddir
) of
885 (_
, FileNotExists
, _
, DirNotExists
) ->
886 -- we don't need to care if it exists now, since we check at probe time
887 return MonitorStateNonExistent
889 (False, _
, False, _
) ->
890 return MonitorStateAlreadyChanged
892 (True, FileExists
, _
, _
) ->
893 return MonitorStateFileExists
895 (True, FileModTime
, _
, _
) ->
896 handleIOException MonitorStateAlreadyChanged
$ do
897 mtime
<- getModTime abspath
898 if changedDuringUpdate mstartTime mtime
899 then return MonitorStateAlreadyChanged
900 else return (MonitorStateFileModTime mtime
)
902 (True, FileHashed
, _
, _
) ->
903 handleIOException MonitorStateAlreadyChanged
$ do
904 mtime
<- getModTime abspath
905 if changedDuringUpdate mstartTime mtime
906 then return MonitorStateAlreadyChanged
907 else do hash
<- getFileHash hashcache abspath abspath mtime
908 return (MonitorStateFileHashed mtime hash
)
910 (_
, _
, True, DirExists
) ->
911 return MonitorStateDirExists
913 (_
, _
, True, DirModTime
) ->
914 handleIOException MonitorStateAlreadyChanged
$ do
915 mtime
<- getModTime abspath
916 if changedDuringUpdate mstartTime mtime
917 then return MonitorStateAlreadyChanged
918 else return (MonitorStateDirModTime mtime
)
920 (False, _
, True, DirNotExists
) -> return MonitorStateAlreadyChanged
921 (True, FileNotExists
, False, _
) -> return MonitorStateAlreadyChanged
923 -- | If we have a timestamp for the beginning of the update, then any file
924 -- mtime later than this means that it changed during the update and we ought
925 -- to consider the file as already changed.
927 changedDuringUpdate
:: Maybe MonitorTimestamp
-> ModTime
-> Bool
928 changedDuringUpdate
(Just
(MonitorTimestamp startTime
)) mtime
930 changedDuringUpdate _ _
= False
932 -- | Much like 'buildMonitorStateFileSet' but for the somewhat complicated case
935 -- This gets used both by 'buildMonitorStateFileSet' when we're taking the
936 -- file system snapshot, but also by 'probeGlobStatus' as part of checking
937 -- the monitored (globed) files for changes when we find a whole new subtree.
939 buildMonitorStateGlob
:: Maybe MonitorTimestamp
-- ^ start time of update
940 -> FileHashCache
-- ^ existing file hashes
941 -> MonitorKindFile
-> MonitorKindDir
942 -> FilePath -- ^ the root directory
943 -> FilePathGlob
-- ^ the matching glob
944 -> IO MonitorStateGlob
945 buildMonitorStateGlob mstartTime hashcache kindfile kinddir relroot
946 (FilePathGlob globroot globPath
) = do
947 root
<- liftIO
$ getFilePathRootDirectory globroot relroot
948 MonitorStateGlob kindfile kinddir globroot
<$>
949 buildMonitorStateGlobRel
950 mstartTime hashcache kindfile kinddir root
"." globPath
952 buildMonitorStateGlobRel
:: Maybe MonitorTimestamp
-- ^ start time of update
953 -> FileHashCache
-- ^ existing file hashes
954 -> MonitorKindFile
-> MonitorKindDir
955 -> FilePath -- ^ the root directory
956 -> FilePath -- ^ directory we are examining
957 -- relative to the root
958 -> FilePathGlobRel
-- ^ the matching glob
959 -> IO MonitorStateGlobRel
960 buildMonitorStateGlobRel mstartTime hashcache kindfile kinddir root
962 let absdir
= root
</> dir
963 dirEntries
<- getDirectoryContents absdir
964 dirMTime
<- getModTime absdir
966 GlobDir glob globPath
' -> do
967 subdirs
<- filterM (\subdir
-> doesDirectoryExist (absdir
</> subdir
))
968 $ filter (matchGlob glob
) dirEntries
970 for
(sort subdirs
) $ \subdir
-> do
971 fstate
<- buildMonitorStateGlobRel
972 mstartTime hashcache kindfile kinddir root
973 (dir
</> subdir
) globPath
'
974 return (subdir
, fstate
)
975 return $! MonitorStateGlobDirs glob globPath
' dirMTime subdirStates
978 let files
= filter (matchGlob glob
) dirEntries
980 for
(sort files
) $ \file
-> do
981 fstate
<- buildMonitorStateFile
982 mstartTime hashcache kindfile kinddir root
984 return (file
, fstate
)
985 return $! MonitorStateGlobFiles glob dirMTime filesStates
988 return MonitorStateGlobDirTrailing
991 -- | We really want to avoid re-hashing files all the time. We already make
992 -- the assumption that if a file mtime has not changed then we don't need to
993 -- bother checking if the content hash has changed. We can apply the same
994 -- assumption when updating the file monitor state. In the typical case of
995 -- updating a file monitor the set of files is the same or largely the same so
996 -- we can grab the previously known content hashes with their corresponding
999 type FileHashCache
= Map
FilePath (ModTime
, Hash
)
1001 -- | We declare it a cache hit if the mtime of a file is the same as before.
1003 lookupFileHashCache
:: FileHashCache
-> FilePath -> ModTime
-> Maybe Hash
1004 lookupFileHashCache hashcache file mtime
= do
1005 (mtime
', hash
) <- Map
.lookup file hashcache
1006 guard (mtime
' == mtime
)
1009 -- | Either get it from the cache or go read the file
1010 getFileHash
:: FileHashCache
-> FilePath -> FilePath -> ModTime
-> IO Hash
1011 getFileHash hashcache relfile absfile mtime
=
1012 case lookupFileHashCache hashcache relfile mtime
of
1013 Just hash
-> return hash
1014 Nothing
-> readFileHash absfile
1016 -- | Build a 'FileHashCache' from the previous 'MonitorStateFileSet'. While
1017 -- in principle we could preserve the structure of the previous state, given
1018 -- that the set of files to monitor can change then it's simpler just to throw
1019 -- away the structure and use a finite map.
1021 readCacheFileHashes
:: (Binary a
, Structured a
, Binary b
, Structured b
)
1022 => FileMonitor a b
-> IO FileHashCache
1023 readCacheFileHashes monitor
=
1024 handleDoesNotExist Map
.empty $
1025 handleErrorCall Map
.empty $
1026 withCacheFile monitor
$ \res
->
1028 Left _
-> return Map
.empty
1029 Right
(msfs
, _
, _
) -> return (mkFileHashCache msfs
)
1031 mkFileHashCache
:: MonitorStateFileSet
-> FileHashCache
1032 mkFileHashCache
(MonitorStateFileSet singlePaths globPaths
) =
1033 collectAllFileHashes singlePaths
1034 `Map
.union` collectAllGlobHashes globPaths
1036 collectAllFileHashes
:: [MonitorStateFile
] -> Map
FilePath (ModTime
, Hash
)
1037 collectAllFileHashes singlePaths
=
1038 Map
.fromList
[ (fpath
, (mtime
, hash
))
1039 | MonitorStateFile _ _ fpath
1040 (MonitorStateFileHashed mtime hash
) <- singlePaths
]
1042 collectAllGlobHashes
:: [MonitorStateGlob
] -> Map
FilePath (ModTime
, Hash
)
1043 collectAllGlobHashes globPaths
=
1044 Map
.fromList
[ (fpath
, (mtime
, hash
))
1045 | MonitorStateGlob _ _ _ gstate
<- globPaths
1046 , (fpath
, (mtime
, hash
)) <- collectGlobHashes
"" gstate
]
1048 collectGlobHashes
:: FilePath -> MonitorStateGlobRel
-> [(FilePath, (ModTime
, Hash
))]
1049 collectGlobHashes dir
(MonitorStateGlobDirs _ _ _ entries
) =
1051 |
(subdir
, fstate
) <- entries
1052 , res
<- collectGlobHashes
(dir
</> subdir
) fstate
]
1054 collectGlobHashes dir
(MonitorStateGlobFiles _ _ entries
) =
1055 [ (dir
</> fname
, (mtime
, hash
))
1056 |
(fname
, MonitorStateFileHashed mtime hash
) <- entries
]
1058 collectGlobHashes _dir MonitorStateGlobDirTrailing
=
1062 ------------------------------------------------------------------------------
1066 -- | Within the @root@ directory, check if @file@ has its 'ModTime' is
1067 -- the same as @mtime@, short-circuiting if it is different.
1068 probeFileModificationTime
:: FilePath -> FilePath -> ModTime
-> ChangedM
()
1069 probeFileModificationTime root file mtime
= do
1070 unchanged
<- liftIO
$ checkModificationTimeUnchanged root file mtime
1071 unless unchanged
(somethingChanged file
)
1073 -- | Within the @root@ directory, check if @file@ has its 'ModTime' and
1074 -- 'Hash' is the same as @mtime@ and @hash@, short-circuiting if it is
1076 probeFileModificationTimeAndHash
:: FilePath -> FilePath -> ModTime
-> Hash
1078 probeFileModificationTimeAndHash root file mtime hash
= do
1079 unchanged
<- liftIO
$
1080 checkFileModificationTimeAndHashUnchanged root file mtime hash
1081 unless unchanged
(somethingChanged file
)
1083 -- | Within the @root@ directory, check if @file@ still exists as a file.
1084 -- If it *does not* exist, short-circuit.
1085 probeFileExistence
:: FilePath -> FilePath -> ChangedM
()
1086 probeFileExistence root file
= do
1087 existsFile
<- liftIO
$ doesFileExist (root
</> file
)
1088 unless existsFile
(somethingChanged file
)
1090 -- | Within the @root@ directory, check if @dir@ still exists.
1091 -- If it *does not* exist, short-circuit.
1092 probeDirExistence
:: FilePath -> FilePath -> ChangedM
()
1093 probeDirExistence root dir
= do
1094 existsDir
<- liftIO
$ doesDirectoryExist (root
</> dir
)
1095 unless existsDir
(somethingChanged dir
)
1097 -- | Within the @root@ directory, check if @file@ still does not exist.
1098 -- If it *does* exist, short-circuit.
1099 probeFileNonExistence
:: FilePath -> FilePath -> ChangedM
()
1100 probeFileNonExistence root file
= do
1101 existsFile
<- liftIO
$ doesFileExist (root
</> file
)
1102 existsDir
<- liftIO
$ doesDirectoryExist (root
</> file
)
1103 when (existsFile || existsDir
) (somethingChanged file
)
1105 -- | Returns @True@ if, inside the @root@ directory, @file@ has the same
1106 -- 'ModTime' as @mtime@.
1107 checkModificationTimeUnchanged
:: FilePath -> FilePath
1108 -> ModTime
-> IO Bool
1109 checkModificationTimeUnchanged root file mtime
=
1110 handleIOException
False $ do
1111 mtime
' <- getModTime
(root
</> file
)
1112 return (mtime
== mtime
')
1114 -- | Returns @True@ if, inside the @root@ directory, @file@ has the
1115 -- same 'ModTime' and 'Hash' as @mtime and @chash@.
1116 checkFileModificationTimeAndHashUnchanged
:: FilePath -> FilePath
1117 -> ModTime
-> Hash
-> IO Bool
1118 checkFileModificationTimeAndHashUnchanged root file mtime chash
=
1119 handleIOException
False $ do
1120 mtime
' <- getModTime
(root
</> file
)
1124 chash
' <- readFileHash
(root
</> file
)
1125 return (chash
== chash
')
1127 -- | Read a non-cryptographic hash of a @file@.
1128 readFileHash
:: FilePath -> IO Hash
1130 withBinaryFile file ReadMode
$ \hnd
->
1131 evaluate
. Hashable
.hash
=<< BS
.hGetContents hnd
1133 -- | Given a directory @dir@, return @Nothing@ if its 'ModTime'
1134 -- is the same as @mtime@, and the new 'ModTime' if it is not.
1135 checkDirectoryModificationTime
:: FilePath -> ModTime
-> IO (Maybe ModTime
)
1136 checkDirectoryModificationTime dir mtime
=
1137 handleIOException Nothing
$ do
1138 mtime
' <- getModTime dir
1141 else return (Just mtime
')
1143 -- | Run an IO computation, returning the first argument @e@ if there is an 'error'
1144 -- call. ('ErrorCall')
1145 handleErrorCall
:: a
-> IO a
-> IO a
1146 handleErrorCall e
= handle handler
where
1147 #if MIN_VERSION_base
(4,9,0)
1148 handler
(ErrorCallWithLocation _ _
) = return e
1150 handler
(ErrorCall _
) = return e
1154 -- | Run an IO computation, returning @e@ if there is any 'IOException'.
1156 -- This policy is OK in the file monitor code because it just causes the
1157 -- monitor to report that something changed, and then code reacting to that
1158 -- will normally encounter the same IO exception when it re-runs the action
1159 -- that uses the file.
1161 handleIOException
:: a
-> IO a
-> IO a
1162 handleIOException e
=
1163 handle
(anyIOException e
)
1165 anyIOException
:: a
-> IOException
-> IO a
1166 anyIOException x _
= return x
1169 ------------------------------------------------------------------------------