Merge branch 'darcs' into master
[git-darcs-import.git] / src / Darcs / Diff.lhs
blob8b2cf732875a6a57f8b9a8fc1b47e4e4e62e742c
1 % Copyright (C) 2002-2003 David Roundy
3 % This program is free software; you can redistribute it and/or modify
4 % it under the terms of the GNU General Public License as published by
5 % the Free Software Foundation; either version 2, or (at your option)
6 % any later version.
8 % This program is distributed in the hope that it will be useful,
9 % but WITHOUT ANY WARRANTY; without even the implied warranty of
10 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 % GNU General Public License for more details.
13 % You should have received a copy of the GNU General Public License
14 % along with this program; see the file COPYING. If not, write to
15 % the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
16 % Boston, MA 02110-1301, USA.
18 \chapter{Diff}
20 \begin{code}
21 {-# OPTIONS_GHC -cpp -fglasgow-exts #-}
22 {-# LANGUAGE CPP #-}
24 #include "gadts.h"
26 module Darcs.Diff ( unsafeDiff, sync, cmp
27 #ifndef GADT_WITNESSES
28 , diff_files
29 #endif
30 ) where
32 import System.Posix
33 ( setFileTimes )
34 import System.IO ( IOMode(ReadMode), hFileSize, hClose )
35 import System.Directory ( doesDirectoryExist, doesFileExist,
36 getDirectoryContents,
38 import Control.Monad ( when )
39 import Data.List ( sort
40 #ifndef GADT_WITNESSES
41 , intersperse
42 #endif
45 #ifndef GADT_WITNESSES
46 import ByteStringUtils ( is_funky, linesPS)
47 import qualified Data.ByteString.Char8 as BC (last)
48 import qualified Data.ByteString as B (null, empty, take, ByteString)
49 #endif
50 import qualified Data.ByteString as B (hGet, length)
52 import Darcs.SlurpDirectory ( Slurpy, slurp_name, is_dir, is_file,
53 get_dircontents, get_filecontents,
54 get_mtime, get_length,
55 undefined_time
56 #ifndef GADT_WITNESSES
57 , FileContents, undefined_size
58 #endif
60 #ifndef GADT_WITNESSES
61 import Darcs.FilePathUtils ( (///) )
62 #endif
63 import Darcs.Patch ( Prim
64 #ifndef GADT_WITNESSES
65 , hunk, canonize, rmfile, rmdir
66 , addfile, adddir
67 , binary, invert
68 #endif
70 import System.IO ( openBinaryFile )
71 import Darcs.Repository.Prefs ( FileType(..) )
72 import Darcs.Flags ( DarcsFlag(..) )
73 import Darcs.Utils ( catchall )
74 import Darcs.Ordered ( FL(..)
75 #ifndef GADT_WITNESSES
76 , (+>+)
77 #endif
79 #ifndef GADT_WITNESSES
80 #include "impossible.h"
81 #endif
82 \end{code}
84 The diff function takes a recursive diff of two slurped-up directory trees.
85 The code involved is actually pretty trivial. \verb!paranoid_diff! runs a
86 diff in which we don't make the assumption that files with the same
87 modification time are identical.
89 \begin{code}
90 unsafeDiff :: [DarcsFlag]
91 -> (FilePath -> FileType) -> Slurpy -> Slurpy -> FL Prim C(x y)
92 #ifdef GADT_WITNESSES
93 unsafeDiff = undefined
94 #else
95 unsafeDiff opts wt s1 s2
96 = gendiff (ignore_times, look_for_adds, summary) wt [] s1 s2 NilFL
97 where ignore_times = IgnoreTimes `elem` opts
98 look_for_adds = LookForAdds `elem` opts
99 -- NoSummary/Summary both present gives False
100 -- Just Summary gives True
101 -- Just NoSummary gives False
102 -- Neither gives False
103 summary = Summary `elem` opts && NoSummary `notElem` opts
105 mk_filepath :: [FilePath] -> FilePath
106 mk_filepath fps = concat $ intersperse "/" $ reverse fps
108 gendiff :: (Bool,Bool,Bool)
109 -> (FilePath -> FileType) -> [FilePath] -> Slurpy -> Slurpy
110 -> (FL Prim -> FL Prim)
111 gendiff opts@(isparanoid,_,_) wt fps s1 s2
112 | is_file s1 && is_file s2 && maybe_differ =
113 case wt n2 of
114 TextFile -> diff_files f b1 b2
115 BinaryFile -> if b1 /= b2 then (binary f b1 b2:>:)
116 else id
117 | is_dir s1 && is_dir s2 =
118 let fps' = case n2 of
119 "." -> fps
120 _ -> n2:fps
121 in fps' `seq` recur_diff opts (wt . (n2///)) fps' dc1 dc2
122 | otherwise = id
123 where n2 = slurp_name s2
124 f = mk_filepath (n2:fps)
125 b1 = get_filecontents s1
126 b2 = get_filecontents s2
127 dc1 = get_dircontents s1
128 dc2 = get_dircontents s2
129 maybe_differ = isparanoid
130 || get_mtime s1 == undefined_time
131 || get_mtime s1 /= get_mtime s2
132 || get_length s1 == undefined_size
133 || get_length s1 /= get_length s2
135 -- recur_diff or recursive diff
136 -- First parameter is (IgnoreTimes?, LookforAdds?, Summary?)
137 recur_diff :: (Bool,Bool,Bool)
138 -> (FilePath -> FileType) -> [FilePath] -> [Slurpy] -> [Slurpy]
139 -> (FL Prim -> FL Prim)
140 recur_diff _ _ _ [] [] = id
141 recur_diff opts@(_,doadd,summary) wt fps (s:ss) (s':ss')
142 -- this is the case if a file has been removed in the working directory
143 | s < s' = diff_removed wt fps s . recur_diff opts wt fps ss (s':ss')
144 -- this next case is when there is a file in the directory that is not
145 -- in the repository (ie, not managed by darcs)
146 | s > s' = let rest = recur_diff opts wt fps (s:ss) ss'
147 in if not doadd then rest
148 else diff_added summary wt fps s' . rest
149 -- actually compare the files because the names match
150 | s == s' = gendiff opts wt fps s s' . recur_diff opts wt fps ss ss'
151 recur_diff opts wt fps (s:ss) [] =
152 diff_removed wt fps s . recur_diff opts wt fps ss []
153 recur_diff opts@(_,True,summary) wt fps [] (s':ss') =
154 diff_added summary wt fps s' . recur_diff opts wt fps [] ss'
155 recur_diff (_,False,_) _ _ [] _ = id
156 recur_diff _ _ _ _ _ = impossible
158 -- creates a diff for a file or directory which needs to be added to the
159 -- repository
160 diff_added :: Bool -> (FilePath -> FileType) -> [FilePath] -> Slurpy
161 -> (FL Prim -> FL Prim)
162 diff_added summary wt fps s
163 | is_file s = case wt n of
164 TextFile -> (addfile f:>:) .
165 (if summary
166 then id
167 else diff_from_empty id f (get_filecontents s))
168 BinaryFile -> (addfile f:>:) .
169 (if summary then id else
170 (bin_patch f B.empty (get_filecontents s)))
171 | otherwise {- is_dir s -} =
172 (adddir f:>:)
173 . foldr (.) id (map (diff_added summary wt (n:fps)) (get_dircontents s))
174 where n = slurp_name s
175 f = mk_filepath (n:fps)
177 get_text :: FileContents -> [B.ByteString]
178 get_text = linesPS
180 empt :: FileContents
181 empt = B.empty
183 diff_files :: FilePath -> FileContents -> FileContents
184 -> (FL Prim -> FL Prim)
185 diff_files f o n | get_text o == [B.empty] && get_text n == [B.empty] = id
186 | get_text o == [B.empty] = diff_from_empty id f n
187 | get_text n == [B.empty] = diff_from_empty invert f o
188 diff_files f o n = if o == n
189 then id
190 else if has_bin o || has_bin n
191 then (binary f o n:>:)
192 else (canonize (hunk f 1 (linesPS o) (linesPS n)) +>+)
194 diff_from_empty :: (Prim -> Prim) -> FilePath -> FileContents
195 -> (FL Prim -> FL Prim)
196 diff_from_empty inv f b =
197 if b == B.empty
198 then id
199 else let p = if has_bin b
200 then binary f B.empty b
201 else if BC.last b == '\n'
202 then hunk f 1 [] $ init $ linesPS b
203 else hunk f 1 [B.empty] $ linesPS b
204 in (inv p:>:)
206 {- | We take a B.ByteString which represents a file's contents, and we check to see
207 whether it is a 'binary' file or a 'textual' file. We define a textual file as any file
208 which does not contain two magic characters, '\0' (the NULL character on Unix) and '^Z'
209 (Control-Z, a DOS convention).
211 Note that to improve performance, we won't examine *all* of the string, because that
212 falls down on large files, but just the first 4096 characters. -}
213 has_bin :: FileContents -> Bool
214 has_bin = is_funky . B.take 4096
215 #endif
216 \end{code}
218 \begin{code}
219 #ifndef GADT_WITNESSES
220 bin_patch :: FilePath -> B.ByteString -> B.ByteString
221 -> FL Prim -> FL Prim
222 bin_patch f o n | B.null o && B.null n = id
223 | otherwise = (binary f o n:>:)
224 #endif
225 \end{code}
227 \begin{code}
228 #ifndef GADT_WITNESSES
229 diff_removed :: (FilePath -> FileType) -> [FilePath] -> Slurpy
230 -> (FL Prim -> FL Prim)
231 diff_removed wt fps s
232 | is_file s = case wt n of
233 TextFile -> diff_files f (get_filecontents s) empt
234 . (rmfile f:>:)
235 BinaryFile -> (bin_patch f
236 (get_filecontents s) B.empty)
237 . (rmfile f:>:)
238 | otherwise {- is_dir s -}
239 = foldr (.) (rmdir f:>:)
240 $ map (diff_removed wt (n:fps)) (get_dircontents s)
241 where n = slurp_name s
242 f = mk_filepath (n:fps)
243 #endif
244 \end{code}
246 \begin{code}
247 sync :: String -> Slurpy -> Slurpy -> IO ()
248 sync path s1 s2
249 | is_file s1 && is_file s2 &&
250 (get_mtime s1 == undefined_time || get_mtime s1 /= get_mtime s2) &&
251 get_length s1 == get_length s2 &&
252 get_filecontents s1 == get_filecontents s2 =
253 set_mtime n (get_mtime s2)
254 | is_dir s1 && is_dir s2
255 = n2 `seq` recur_sync n (get_dircontents s1) (get_dircontents s2)
256 | otherwise = return ()
257 where n2 = slurp_name s2
258 n = path++"/"++n2
259 set_mtime fname ctime = setFileTimes fname ctime ctime
260 `catchall` return ()
261 recur_sync _ [] _ = return ()
262 recur_sync _ _ [] = return ()
263 recur_sync p (s:ss) (s':ss')
264 | s < s' = recur_sync p ss (s':ss')
265 | s > s' = recur_sync p (s:ss) ss'
266 | otherwise = do sync p s s'
267 recur_sync p ss ss'
268 \end{code}
271 \begin{code}
272 cmp :: FilePath -> FilePath -> IO Bool
273 cmp p1 p2 = do
274 dir1 <- doesDirectoryExist p1
275 dir2 <- doesDirectoryExist p2
276 file1 <- doesFileExist p1
277 file2 <- doesFileExist p2
278 if dir1 && dir2
279 then cmpdir p1 p2
280 else if file1 && file2
281 then cmpfile p1 p2
282 else return False
283 cmpdir :: FilePath -> FilePath -> IO Bool
284 cmpdir d1 d2 = do
285 fn1 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d1
286 fn2 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d2
287 if sort fn1 /= sort fn2
288 then return False
289 else andIO $ map (\fn-> cmp (d1++"/"++fn) (d2++"/"++fn)) fn1
290 andIO :: [IO Bool] -> IO Bool
291 andIO (iob:iobs) = do b <- iob
292 if b then andIO iobs else return False
293 andIO [] = return True
294 cmpfile :: FilePath -> FilePath -> IO Bool
295 cmpfile f1 f2 = do
296 h1 <- openBinaryFile f1 ReadMode
297 h2 <- openBinaryFile f2 ReadMode
298 l1 <- hFileSize h1
299 l2 <- hFileSize h2
300 if l1 /= l2
301 then do hClose h1
302 hClose h2
303 putStrLn $ "different file lengths for "++f1++" and "++f2
304 return False
305 else do b <- hcmp h1 h2
306 when (not b) $ putStrLn $ "files "++f1++" and "++f2++" differ"
307 hClose h1
308 hClose h2
309 return b
310 where hcmp h1 h2 = do c1 <- B.hGet h1 1024
311 c2 <- B.hGet h2 1024
312 if c1 /= c2
313 then return False
314 else if B.length c1 == 1024
315 then hcmp h1 h2
316 else return True
317 \end{code}