Merge pull request #10525 from 9999years/field-stanza-names
[cabal.git] / Cabal / src / Distribution / PackageDescription / Check / Paths.hs
blob5b2df1f18fa95ca9c7443f63f63bf70192904571
1 -- |
2 -- Module : Distribution.PackageDescription.Check.Paths
3 -- Copyright : Lennart Kolmodin 2008, Francesco Ariis 2023
4 -- License : BSD3
5 --
6 -- Maintainer : cabal-devel@haskell.org
7 -- Portability : portable
8 --
9 -- Functions to check filepaths, directories, globs, etc.
10 module Distribution.PackageDescription.Check.Paths
11 ( checkGlob
12 , checkPath
13 , fileExtensionSupportedLanguage
14 , isGoodRelativeDirectoryPath
15 , isGoodRelativeFilePath
16 , isGoodRelativeGlob
17 , isInsideDist
18 ) where
20 import Distribution.Compat.Prelude
21 import Prelude ()
23 import Distribution.PackageDescription.Check.Common
24 import Distribution.PackageDescription.Check.Monad
25 import Distribution.Simple.CCompiler
26 import Distribution.Simple.Glob
27 ( Glob
28 , explainGlobSyntaxError
29 , isRecursiveInRoot
30 , parseFileGlob
32 import Distribution.Simple.Utils hiding (findPackageDesc, notice)
33 import System.FilePath (splitDirectories, splitPath, takeExtension)
35 import qualified System.FilePath.Windows as FilePath.Windows (isValid)
37 fileExtensionSupportedLanguage :: FilePath -> Bool
38 fileExtensionSupportedLanguage path =
39 isHaskell || isC
40 where
41 extension = takeExtension path
42 isHaskell = extension `elem` [".hs", ".lhs"]
43 isC = isJust (filenameCDialect extension)
45 -- Boolean: are absolute paths allowed?
46 checkPath
47 :: Monad m
48 => Bool -- Can be absolute path?
49 -> CabalField -- .cabal field that we are checking.
50 -> PathKind -- Path type.
51 -> FilePath -- Path.
52 -> CheckM m ()
53 checkPath isAbs title kind path = do
54 checkP
55 (isOutsideTree path)
56 (PackageBuildWarning $ RelativeOutside title path)
57 checkP
58 (isInsideDist path)
59 (PackageDistInexcusable $ DistPoint (Just title) path)
60 checkPackageFileNamesWithGlob kind path
62 -- Skip if "can be absolute path".
63 checkP
64 (not isAbs && isAbsoluteOnAnyPlatform path)
65 (PackageDistInexcusable $ AbsolutePath title path)
66 case grl path kind of
67 Just e ->
68 checkP
69 (not isAbs)
70 (PackageDistInexcusable $ BadRelativePath title path e)
71 Nothing -> return ()
72 checkWindowsPath (kind == PathKindGlob) path
73 where
74 isOutsideTree wpath = case splitDirectories wpath of
75 ".." : _ -> True
76 "." : ".." : _ -> True
77 _ -> False
79 -- These are not paths, but globs...
80 grl wfp PathKindFile = isGoodRelativeFilePath wfp
81 grl wfp PathKindGlob = isGoodRelativeGlob wfp
82 grl wfp PathKindDirectory = isGoodRelativeDirectoryPath wfp
84 -- | Is a 'FilePath' inside `dist`, `dist-newstyle` and friends?
85 isInsideDist :: FilePath -> Bool
86 isInsideDist path =
87 case map lowercase (splitDirectories path) of
88 "dist" : _ -> True
89 "." : "dist" : _ -> True
90 "dist-newstyle" : _ -> True
91 "." : "dist-newstyle" : _ -> True
92 _ -> False
94 checkPackageFileNamesWithGlob
95 :: Monad m
96 => PathKind
97 -> FilePath -- Filepath or possibly a glob pattern.
98 -> CheckM m ()
99 checkPackageFileNamesWithGlob kind fp = do
100 checkWindowsPath (kind == PathKindGlob) fp
101 checkTarPath fp
103 checkWindowsPath
104 :: Monad m
105 => Bool -- Is it a glob pattern?
106 -> FilePath -- Path.
107 -> CheckM m ()
108 checkWindowsPath isGlob path =
109 checkP
110 (not . FilePath.Windows.isValid $ escape isGlob path)
111 (PackageDistInexcusable $ InvalidOnWin [path])
112 where
113 -- Force a relative name to catch invalid file names like "f:oo" which
114 -- otherwise parse as file "oo" in the current directory on the 'f' drive.
115 escape :: Bool -> String -> String
116 escape wisGlob wpath =
117 (".\\" ++)
118 -- Glob paths will be expanded before being dereferenced, so asterisks
119 -- shouldn't count against them.
121 map (\c -> if c == '*' && wisGlob then 'x' else c) wpath
123 -- | Check a file name is valid for the portable POSIX tar format.
125 -- The POSIX tar format has a restriction on the length of file names. It is
126 -- unfortunately not a simple restriction like a maximum length. The exact
127 -- restriction is that either the whole path be 100 characters or less, or it
128 -- be possible to split the path on a directory separator such that the first
129 -- part is 155 characters or less and the second part 100 characters or less.
130 checkTarPath :: Monad m => FilePath -> CheckM m ()
131 checkTarPath path
132 | length path > 255 = tellP longPath
133 | otherwise = case pack nameMax (reverse (splitPath path)) of
134 Left err -> tellP err
135 Right [] -> return ()
136 Right (h : rest) -> case pack prefixMax remainder of
137 Left err -> tellP err
138 Right [] -> return ()
139 Right (_ : _) -> tellP noSplit
140 where
141 -- drop the '/' between the name and prefix:
142 remainder = safeInit h : rest
143 where
144 nameMax, prefixMax :: Int
145 nameMax = 100
146 prefixMax = 155
148 pack _ [] = Left emptyName
149 pack maxLen (c : cs)
150 | n > maxLen = Left longName
151 | otherwise = Right (pack' maxLen n cs)
152 where
153 n = length c
155 pack' maxLen n (c : cs)
156 | n' <= maxLen = pack' maxLen n' cs
157 where
158 n' = n + length c
159 pack' _ _ cs = cs
161 longPath = PackageDistInexcusable (FilePathTooLong path)
162 longName = PackageDistInexcusable (FilePathNameTooLong path)
163 noSplit = PackageDistInexcusable (FilePathSplitTooLong path)
164 emptyName = PackageDistInexcusable FilePathEmpty
166 -- `checkGlob` checks glob patterns and returns good ones for further
167 -- processing.
168 checkGlob
169 :: Monad m
170 => CabalField -- .cabal field we are checking.
171 -> FilePath -- glob filepath pattern
172 -> CheckM m (Maybe Glob)
173 checkGlob title pat = do
174 ver <- asksCM ccSpecVersion
176 -- Glob sanity check.
177 case parseFileGlob ver pat of
178 Left e -> do
179 tellP
180 ( PackageDistInexcusable $
181 GlobSyntaxError title (explainGlobSyntaxError pat e)
183 return Nothing
184 Right wglob -> do
185 -- \* Miscellaneous checks on sane glob.
186 -- Checks for recursive glob in root.
187 checkP
188 (isRecursiveInRoot wglob)
189 ( PackageDistSuspiciousWarn $
190 RecursiveGlobInRoot title pat
192 return (Just wglob)
194 -- | Whether a path is a good relative path. We aren't worried about perfect
195 -- cross-platform compatibility here; this function just checks the paths in
196 -- the (local) @.cabal@ file, while only Hackage needs the portability.
198 -- >>> let test fp = putStrLn $ show (isGoodRelativeDirectoryPath fp) ++ "; " ++ show (isGoodRelativeFilePath fp)
200 -- Note that "foo./bar.hs" would be invalid on Windows.
202 -- >>> traverse_ test ["foo/bar/quu", "a/b.hs", "foo./bar.hs"]
203 -- Nothing; Nothing
204 -- Nothing; Nothing
205 -- Nothing; Nothing
207 -- Trailing slash is not allowed for files, for directories it is ok.
209 -- >>> test "foo/"
210 -- Nothing; Just "trailing slash"
212 -- Leading @./@ is fine, but @.@ and @./@ are not valid files.
214 -- >>> traverse_ test [".", "./", "./foo/bar"]
215 -- Nothing; Just "trailing dot segment"
216 -- Nothing; Just "trailing slash"
217 -- Nothing; Nothing
219 -- Lastly, not good file nor directory cases:
221 -- >>> traverse_ test ["", "/tmp/src", "foo//bar", "foo/.", "foo/./bar", "foo/../bar"]
222 -- Just "empty path"; Just "empty path"
223 -- Just "posix absolute path"; Just "posix absolute path"
224 -- Just "empty path segment"; Just "empty path segment"
225 -- Just "trailing same directory segment: ."; Just "trailing same directory segment: ."
226 -- Just "same directory segment: ."; Just "same directory segment: ."
227 -- Just "parent directory segment: .."; Just "parent directory segment: .."
229 -- For the last case, 'isGoodRelativeGlob' doesn't warn:
231 -- >>> traverse_ (print . isGoodRelativeGlob) ["foo/../bar"]
232 -- Just "parent directory segment: .."
233 isGoodRelativeFilePath :: FilePath -> Maybe String
234 isGoodRelativeFilePath = state0
235 where
236 -- initial state
237 state0 [] = Just "empty path"
238 state0 (c : cs)
239 | c == '.' = state1 cs
240 | c == '/' = Just "posix absolute path"
241 | otherwise = state5 cs
243 -- after initial .
244 state1 [] = Just "trailing dot segment"
245 state1 (c : cs)
246 | c == '.' = state4 cs
247 | c == '/' = state2 cs
248 | otherwise = state5 cs
250 -- after ./ or after / between segments
251 state2 [] = Just "trailing slash"
252 state2 (c : cs)
253 | c == '.' = state3 cs
254 | c == '/' = Just "empty path segment"
255 | otherwise = state5 cs
257 -- after non-first segment's .
258 state3 [] = Just "trailing same directory segment: ."
259 state3 (c : cs)
260 | c == '.' = state4 cs
261 | c == '/' = Just "same directory segment: ."
262 | otherwise = state5 cs
264 -- after ..
265 state4 [] = Just "trailing parent directory segment: .."
266 state4 (c : cs)
267 | c == '.' = state5 cs
268 | c == '/' = Just "parent directory segment: .."
269 | otherwise = state5 cs
271 -- in a segment which is ok.
272 state5 [] = Nothing
273 state5 (c : cs)
274 | c == '.' = state5 cs
275 | c == '/' = state2 cs
276 | otherwise = state5 cs
278 -- | See 'isGoodRelativeFilePath'.
280 -- This is barebones function. We check whether the glob is a valid file
281 -- by replacing stars @*@ with @x@ses.
282 isGoodRelativeGlob :: FilePath -> Maybe String
283 isGoodRelativeGlob = isGoodRelativeFilePath . map f
284 where
285 f '*' = 'x'
286 f c = c
288 -- | See 'isGoodRelativeFilePath'.
289 isGoodRelativeDirectoryPath :: FilePath -> Maybe String
290 isGoodRelativeDirectoryPath = state0
291 where
292 -- initial state
293 state0 [] = Just "empty path"
294 state0 (c : cs)
295 | c == '.' = state5 cs
296 | c == '/' = Just "posix absolute path"
297 | otherwise = state4 cs
299 -- after initial ./ or after / between segments
300 state1 [] = Nothing
301 state1 (c : cs)
302 | c == '.' = state2 cs
303 | c == '/' = Just "empty path segment"
304 | otherwise = state4 cs
306 -- after non-first setgment's .
307 state2 [] = Just "trailing same directory segment: ."
308 state2 (c : cs)
309 | c == '.' = state3 cs
310 | c == '/' = Just "same directory segment: ."
311 | otherwise = state4 cs
313 -- after ..
314 state3 [] = Just "trailing parent directory segment: .."
315 state3 (c : cs)
316 | c == '.' = state4 cs
317 | c == '/' = Just "parent directory segment: .."
318 | otherwise = state4 cs
320 -- in a segment which is ok.
321 state4 [] = Nothing
322 state4 (c : cs)
323 | c == '.' = state4 cs
324 | c == '/' = state1 cs
325 | otherwise = state4 cs
327 -- after initial .
328 state5 [] = Nothing -- "."
329 state5 (c : cs)
330 | c == '.' = state3 cs
331 | c == '/' = state1 cs
332 | otherwise = state4 cs
334 -- [Note: Good relative paths]
336 -- Using @kleene@ we can define an extended regex:
338 -- @
339 -- import Algebra.Lattice
340 -- import Kleene
341 -- import Kleene.ERE (ERE (..), intersections)
343 -- data C = CDot | CSlash | CChar
344 -- deriving (Eq, Ord, Enum, Bounded, Show)
346 -- reservedR :: ERE C
347 -- reservedR = notChar CSlash
349 -- pathPieceR :: ERE C
350 -- pathPieceR = intersections
351 -- [ plus reservedR
352 -- , ERENot (string [CDot])
353 -- , ERENot (string [CDot,CDot])
354 -- ]
356 -- filePathR :: ERE C
357 -- filePathR = optional (string [CDot, CSlash]) <> pathPieceR <> star (char CSlash <> pathPieceR)
359 -- dirPathR :: ERE C
360 -- dirPathR = (char CDot \/ filePathR) <> optional (char CSlash)
362 -- plus :: ERE C -> ERE C
363 -- plus r = r <> star r
365 -- optional :: ERE C -> ERE C
366 -- optional r = mempty \/ r
367 -- @
369 -- Results in following state machine for @filePathR@
371 -- @
372 -- 0 -> \x -> if
373 -- | x <= CDot -> 1
374 -- | otherwise -> 5
375 -- 1 -> \x -> if
376 -- | x <= CDot -> 4
377 -- | x <= CSlash -> 2
378 -- | otherwise -> 5
379 -- 2 -> \x -> if
380 -- | x <= CDot -> 3
381 -- | otherwise -> 5
382 -- 3 -> \x -> if
383 -- | x <= CDot -> 4
384 -- | otherwise -> 5
385 -- 4 -> \x -> if
386 -- | x <= CDot -> 5
387 -- | otherwise -> 5
388 -- 5+ -> \x -> if
389 -- | x <= CDot -> 5
390 -- | x <= CSlash -> 2
391 -- | otherwise -> 5
392 -- @
394 -- and @dirPathR@:
396 -- @
397 -- 0 -> \x -> if
398 -- | x <= CDot -> 5
399 -- | otherwise -> 4
400 -- 1+ -> \x -> if
401 -- | x <= CDot -> 2
402 -- | otherwise -> 4
403 -- 2 -> \x -> if
404 -- | x <= CDot -> 3
405 -- | otherwise -> 4
406 -- 3 -> \x -> if
407 -- | x <= CDot -> 4
408 -- | otherwise -> 4
409 -- 4+ -> \x -> if
410 -- | x <= CDot -> 4
411 -- | x <= CSlash -> 1
412 -- | otherwise -> 4
413 -- 5+ -> \x -> if
414 -- | x <= CDot -> 3
415 -- | x <= CSlash -> 1
416 -- | otherwise -> 4
417 -- @