Follow upstream changes -- rest
[git-darcs-import.git] / src / Darcs / Commands / Optimize.lhs
blob85d1388763b1e00329afe9940d0545dca0f3c7f3
1 % Copyright (C) 2003-2005 David Roundy
3 % This program is free software; you can redistribute it and/or modify
4 % it under the terms of the GNU General Public License as published by
5 % the Free Software Foundation; either version 2, or (at your option)
6 % any later version.
8 % This program is distributed in the hope that it will be useful,
9 % but WITHOUT ANY WARRANTY; without even the implied warranty of
10 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 % GNU General Public License for more details.
13 % You should have received a copy of the GNU General Public License
14 % along with this program; see the file COPYING. If not, write to
15 % the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
16 % Boston, MA 02110-1301, USA.
18 \subsection{darcs optimize}
19 \begin{code}
20 {-# OPTIONS_GHC -cpp #-}
21 {-# LANGUAGE CPP #-}
23 module Darcs.Commands.Optimize ( optimize ) where
24 import Control.Monad ( when, unless )
25 import Data.Maybe ( isJust )
26 import Text.Regex ( mkRegex, matchRegex )
27 import System.Directory ( getDirectoryContents, doesDirectoryExist )
29 import Darcs.Hopefully ( hopefully, info )
30 import Darcs.Commands ( DarcsCommand(..), nodefaults )
31 import Darcs.Arguments ( DarcsFlag( Compress, UnCompress,
32 NoCompress, Reorder,
33 TagName, CheckPoint,
34 Relink, RelinkPristine ),
35 tagname, checkpoint, reorder_patches,
36 uncompress_nocompress,
37 relink, relink_pristine, sibling,
38 flagsToSiblings,
39 working_repo_dir, umask_option,
41 import Darcs.Repository.Prefs ( get_preflist )
42 import Darcs.Repository ( Repository, PatchSet, withRepoLock, ($-), withGutsOf,
43 read_repo, optimizeInventory, slurp_recorded,
44 tentativelyReplacePatches, cleanRepository,
45 amInRepository, finalizeRepositoryChanges )
46 import Darcs.Repository.Checkpoint ( write_checkpoint )
47 import Darcs.Ordered ( RL(..), unsafeUnRL, (+<+), mapFL_FL, reverseRL, mapRL, concatRL )
48 import Darcs.Patch.Info ( PatchInfo, just_name, human_friendly )
49 import Darcs.Patch ( RepoPatch )
50 import ByteStringUtils ( gzReadFilePS )
51 import Darcs.Patch.Depends ( deep_optimize_patchset, slightly_optimize_patchset,
52 get_patches_beyond_tag, get_patches_in_tag,
54 import Darcs.Lock ( maybeRelink, gzWriteAtomicFilePS, writeAtomicFilePS )
55 import Darcs.RepoPath ( toFilePath )
56 import Darcs.Utils ( withCurrentDirectory )
57 import Darcs.Progress ( debugMessage )
58 import Printer ( putDocLn, text, ($$) )
59 import Darcs.SlurpDirectory ( slurp, list_slurpy_files )
60 import Darcs.Repository.Pristine ( identifyPristine, pristineDirectory )
61 import Darcs.Sealed ( FlippedSeal(..), unsafeUnseal )
62 import Darcs.Global ( darcsdir )
63 #include "impossible.h"
64 \end{code}
65 \begin{code}
66 optimize_description :: String
67 optimize_description =
68 "Optimize the repository."
69 \end{code}
71 \options{optimize}
73 \haskell{optimize_help}
75 \begin{code}
76 optimize_help :: String
77 optimize_help =
78 "Optimize can help to improve the performance of your repository in a number of cases.\n"
79 \end{code}
80 \begin{code}
81 optimize :: DarcsCommand
82 optimize = DarcsCommand {command_name = "optimize",
83 command_help = optimize_help,
84 command_description = optimize_description,
85 command_extra_args = 0,
86 command_extra_arg_help = [],
87 command_command = optimize_cmd,
88 command_prereq = amInRepository,
89 command_get_arg_possibilities = return [],
90 command_argdefaults = nodefaults,
91 command_advanced_options = [uncompress_nocompress, umask_option],
92 command_basic_options = [checkpoint,
93 tagname,
94 working_repo_dir,
95 reorder_patches,
96 sibling, relink,
97 relink_pristine]}
98 \end{code}
99 \begin{code}
100 optimize_cmd :: [DarcsFlag] -> [String] -> IO ()
101 optimize_cmd origopts _ = withRepoLock opts $- \repository -> do
102 cleanRepository repository
103 do_reorder opts repository
104 do_optimize_inventory repository
105 when (CheckPoint `elem` opts) $ do_checkpoint opts repository
106 when (Compress `elem` opts || UnCompress `elem` opts) $ optimize_compression opts
107 when (Relink `elem` opts || (RelinkPristine `elem` opts)) $
108 do_relink opts repository
109 putStrLn "Done optimizing!"
110 where opts = if UnCompress `elem` origopts then NoCompress:origopts else origopts
111 is_tag :: PatchInfo -> Bool
112 is_tag pinfo = take 4 (just_name pinfo) == "TAG "
113 \end{code}
115 Optimize always writes out a fresh copy of the inventory that minimizes
116 the amount of inventory that need be downloaded when people pull from the
117 repository.
119 Specifically, it breaks up the inventory on the most recent tag. This speeds
120 up most commands when run remotely, both because a smaller file needs to be
121 transfered (only the most recent inventory). It also gives a
122 guarantee that all the patches prior to a given tag are included in that tag,
123 so less commutation and history traversal is needed. This latter issue can
124 become very important in large repositories.
126 \begin{code}
127 do_optimize_inventory :: RepoPatch p => Repository p -> IO ()
128 do_optimize_inventory repository = do
129 debugMessage "Writing out a nice copy of the inventory."
130 optimizeInventory repository
131 debugMessage "Done writing out a nice copy of the inventory."
132 \end{code}
134 \begin{options}
135 --checkpoint, --tag
136 \end{options}
138 If you use the \verb!--checkpoint! option, optimize creates a checkpoint patch
139 for a tag. You can specify the tag with the \verb!--tag! option, or
140 just let darcs choose the most recent tag. Note that optimize
141 \verb!--checkpoint! will fail when used on a ``partial'' repository. Also,
142 the tag that is to be checkpointed must not be preceded by any patches
143 that are not included in that tag. If that is the case, no checkpointing
144 is done.
146 The created checkpoint is used by the \verb!--partial! flag to
147 \verb!get! and \verb!check!. This allows for users to retrieve
148 a working repository with limited history with a savings of disk
149 space and bandwidth.
153 \begin{code}
154 do_checkpoint :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
155 do_checkpoint opts repository = do
156 mpi <- get_tag opts repository
157 case mpi of
158 Nothing -> return ()
159 Just pinfo -> do putDocLn $ text "Checkpointing tag:"
160 $$ human_friendly pinfo
161 write_checkpoint repository pinfo
163 get_tag :: RepoPatch p => [DarcsFlag] -> Repository p -> IO (Maybe PatchInfo)
164 get_tag [] r = do ps <- read_repo r
165 case filter is_tag $ lasts $ mapRL (mapRL info) ps of
166 [] -> do putStrLn "There is no tag to checkpoint!"
167 return Nothing
168 (pinfo:_) -> return $ Just pinfo
169 get_tag (TagName t:_) r =
170 do ps <- read_repo r
171 case filter (match_tag t) $ lasts $ mapRL (mapRL info) ps of
172 (pinfo:_) -> return $ Just pinfo
173 _ -> case filter (match_tag t) $
174 lasts $ mapRL (mapRL info) $ deep_optimize_patchset ps of
175 (pinfo:_) -> return $ Just pinfo
176 _ -> do putStr "Cannot checkpoint any tag "
177 putStr $ "matching '"++t++"'\n"
178 return Nothing
179 get_tag (_:fs) r = get_tag fs r
181 lasts :: [[a]] -> [a]
182 lasts [] = []
183 lasts (x@(_:_):ls) = last x : lasts ls
184 lasts ([]:ls) = lasts ls
185 \end{code}
187 \begin{code}
188 mymatch :: String -> PatchInfo -> Bool
189 mymatch r = match_name $ matchRegex (mkRegex r)
190 match_name :: (String -> Maybe a) -> PatchInfo -> Bool
191 match_name ch pinfo = isJust $ ch (just_name pinfo)
192 match_tag :: String -> PatchInfo -> Bool
193 match_tag ('^':n) = mymatch $ "^TAG "++n
194 match_tag n = mymatch $ "^TAG .*"++n
195 \end{code}
197 \begin{options}
198 --compress, --dont-compress, --uncompress
199 \end{options}
201 Some compression options are available, and are independent of the
202 \verb!--checkpoint! option.
204 By default the patches in the repository are compressed. These use less
205 disk space, which translates into less bandwidth if the repository is accessed
206 remotely.
208 Note that in the darcs-1.0 (also known as ``old fashioned inventory'')
209 repository format, patches will always have the ``.gz'' extension whether
210 they are compressed or not.
212 You may want to uncompress the patches when you've got enough disk space but
213 are running out of physical memory.
215 If you give the \verb!--compress! option, optimize will compress all the
216 patches in the repository. Similarly, if you give the \verb!--uncompress!,
217 optimize will decompress all the patches in the repository.
218 \verb!--dont-compress! means ``don't compress, but don't uncompress
219 either''. It would be useful if one of the compression options was provided
220 as a default and you wanted to override it.
222 \begin{code}
223 optimize_compression :: [DarcsFlag] -> IO ()
224 optimize_compression opts = do
225 putStrLn "Optimizing (un)compression of patches..."
226 do_compress (darcsdir++"/patches")
227 putStrLn "Optimizing (un)compression of inventories..."
228 do_compress (darcsdir++"/inventories")
229 where do_compress f =
230 do isd <- doesDirectoryExist f
231 if isd then withCurrentDirectory f $
232 do fs <- filter notdot `fmap` getDirectoryContents "."
233 mapM_ do_compress fs
234 else if Compress `elem` opts
235 then gzReadFilePS f >>= gzWriteAtomicFilePS f
236 else gzReadFilePS f >>= writeAtomicFilePS f
237 notdot ('.':_) = False
238 notdot _ = True
240 \end{code}
242 \begin{options}
243 --relink
244 \end{options}
246 The \verb|--relink| and \verb|--relink-pristine| options cause Darcs
247 to relink files from a sibling. See Section \ref{disk-usage}.
250 \begin{code}
251 do_relink :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
252 do_relink opts repository =
253 do some_siblings <- return (flagsToSiblings opts)
254 defrepolist <- get_preflist "defaultrepo"
255 siblings <- return (map toFilePath some_siblings ++ defrepolist)
256 if (siblings == [])
257 then putStrLn "No siblings -- no relinking done."
258 else do when (Relink `elem` opts) $
259 do debugMessage "Relinking patches..."
260 patches <-
261 (fmap list_slurpy_files) (slurp $ darcsdir++"/patches")
262 maybeRelinkFiles siblings patches (darcsdir++"/patches")
263 when (RelinkPristine `elem` opts) $
264 do pristine <- identifyPristine
265 case (pristineDirectory pristine) of
266 (Just d) -> do
267 debugMessage "Relinking pristine tree..."
268 c <- slurp_recorded repository
269 maybeRelinkFiles
270 siblings (list_slurpy_files c) d
271 Nothing -> return ()
272 debugMessage "Done relinking."
273 return ()
274 return ()
276 maybeRelinkFiles :: [String] -> [String] -> String -> IO ()
277 maybeRelinkFiles src dst dir =
278 mapM_ (maybeRelinkFile src) (map ((dir ++ "/") ++) dst)
280 maybeRelinkFile :: [String] -> String -> IO ()
281 maybeRelinkFile [] _ = return ()
282 maybeRelinkFile (h:t) f =
283 do done <- maybeRelink (h ++ "/" ++ f) f
284 unless done $
285 maybeRelinkFile t f
286 return ()
287 \end{code}
289 \begin{options}
290 --reorder-patches
291 \end{options}
293 The \verb|--reorder-patches| option causes Darcs to create an optimal
294 ordering of its internal patch inventory. This may help to produce shorter
295 `context' lists when sending patches, and may improve performance for some
296 other operations as well. You should not run \verb!--reorder-patches! on a
297 repository from which someone may be simultaneously pulling or getting, as
298 this could lead to repository corruption.
300 \begin{code}
301 do_reorder :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
302 do_reorder opts _ | not (Reorder `elem` opts) = return ()
303 do_reorder opts repository = do
304 debugMessage "Reordering the inventory."
305 psnew <- choose_order `fmap` read_repo repository
306 let ps = mapFL_FL hopefully $ reverseRL $ head $ unsafeUnRL psnew
307 withGutsOf repository $ do tentativelyReplacePatches repository opts ps
308 finalizeRepositoryChanges repository
309 debugMessage "Done reordering the inventory."
311 choose_order :: RepoPatch p => PatchSet p -> PatchSet p
312 choose_order ps | isJust last_tag =
313 case slightly_optimize_patchset $ unsafeUnseal $ get_patches_in_tag lt ps of
314 ((t:<:NilRL):<:pps) -> case get_patches_beyond_tag lt ps of
315 FlippedSeal (p :<: NilRL) -> (p+<+(t:<:NilRL)) :<: pps
316 _ -> impossible
317 _ -> impossible
318 where last_tag = case filter is_tag $ mapRL info $ concatRL ps of
319 (t:_) -> Just t
320 _ -> Nothing
321 lt = fromJust last_tag
322 choose_order ps = ps
323 \end{code}