Merge pull request #10677 from geekosaur/unix-i386-fix-2
[cabal.git] / Cabal-described / src / Distribution / Utils / CharSet.hs
blob9243615c7fafeffc7e011904d79223fe45013bbd
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE CPP #-}
3 -- | Sets of characters.
4 --
5 -- Using this is more efficient than 'RE.Type.Alt':ng individual characters.
6 module Distribution.Utils.CharSet (
7 -- * Set of characters
8 CharSet,
9 -- * Construction
10 empty,
11 universe,
12 singleton,
13 insert,
14 union,
15 intersection,
16 complement,
17 difference,
18 -- * Query
19 size,
20 null,
21 member,
22 -- * Conversions
23 fromList,
24 toList,
25 fromIntervalList,
26 toIntervalList,
27 -- * Special lists
28 alpha,
29 alphanum,
30 upper,
31 ) where
33 import Data.Char (chr, isAlpha, isAlphaNum, isUpper, ord)
34 import Data.List (foldl', sortBy)
35 import Data.Monoid (Monoid (..))
36 import Data.String (IsString (..))
37 import Distribution.Compat.Semigroup (Semigroup (..))
38 import Prelude
39 (Bool (..), Bounded (..), Char, Enum (..), Eq (..), Int, Maybe (..), Num (..), Ord (..), Show (..), String, concatMap, flip, fst, otherwise, showParen,
40 showString, uncurry, ($), (.))
42 #if MIN_VERSION_containers(0,5,0)
43 import qualified Data.IntMap.Strict as IM
44 #else
45 import qualified Data.IntMap as IM
46 #endif
48 -- | A set of 'Char's.
50 -- We use range set, which works great with 'Char'.
51 newtype CharSet = CS { unCS :: IM.IntMap Int }
52 deriving (Eq, Ord)
54 instance IsString CharSet where
55 fromString = fromList
57 instance Show CharSet where
58 showsPrec d cs
59 | size cs < 20
60 = showsPrec d (toList cs)
61 | otherwise
62 = showParen (d > 10)
63 $ showString "CS "
64 . showsPrec 11 (unCS cs)
66 instance Semigroup CharSet where
67 (<>) = union
69 instance Monoid CharSet where
70 mempty = empty
71 mappend = (<>)
73 -- | Empty character set.
74 empty :: CharSet
75 empty = CS IM.empty
77 -- | universe
78 universe :: CharSet
79 universe = CS $ IM.singleton 0 0x10ffff
81 -- | Check whether 'CharSet' is 'empty'.
82 null :: CharSet -> Bool
83 null (CS cs) = IM.null cs
85 -- | Size of 'CharSet'
87 -- >>> size $ fromIntervalList [('a','f'), ('0','9')]
88 -- 16
90 -- >>> length $ toList $ fromIntervalList [('a','f'), ('0','9')]
91 -- 16
93 size :: CharSet -> Int
94 size (CS m) = foldl' (\ !acc (lo, hi) -> acc + (hi - lo) + 1) 0 (IM.toList m)
96 -- | Singleton character set.
97 singleton :: Char -> CharSet
98 singleton c = CS (IM.singleton (ord c) (ord c))
100 -- | Test whether character is in the set.
101 member :: Char -> CharSet -> Bool
102 #if MIN_VERSION_containers(0,5,0)
103 member c (CS m) = case IM.lookupLE i m of
104 Nothing -> False
105 Just (_, hi) -> i <= hi
106 where
107 #else
108 member c (CS m) = go (IM.toList m)
109 where
110 go [] = False
111 go ((x,y):zs) = (x <= i && i <= y) || go zs
112 #endif
113 i = ord c
115 -- | Insert 'Char' into 'CharSet'.
116 insert :: Char -> CharSet -> CharSet
117 insert c (CS m) = normalise (IM.insert (ord c) (ord c) m)
119 -- | Union of two 'CharSet's.
120 union :: CharSet -> CharSet -> CharSet
121 union (CS xs) (CS ys) = normalise (IM.unionWith max xs ys)
123 -- | Intersection of two 'CharSet's
124 intersection :: CharSet -> CharSet -> CharSet
125 intersection (CS xs) (CS ys) = CS $
126 IM.fromList (intersectRangeList (IM.toList xs) (IM.toList ys))
128 -- | Compute the intersection.
129 intersectRangeList :: Ord a => [(a, a)] -> [(a, a)] -> [(a, a)]
130 intersectRangeList aset@((x,y):as) bset@((u,v):bs)
131 | y < u = intersectRangeList as bset
132 | v < x = intersectRangeList aset bs
133 | y < v = (max x u, y) : intersectRangeList as bset
134 | otherwise = (max x u, v) : intersectRangeList aset bs
135 intersectRangeList _ [] = []
136 intersectRangeList [] _ = []
138 -- | Complement of a CharSet
139 complement :: CharSet -> CharSet
140 complement (CS xs) = CS $ IM.fromList $ complementRangeList (IM.toList xs)
142 -- | Compute the complement intersected with @[x,)@ assuming @x<u@.
143 complementRangeList' :: Int -> [(Int, Int)] -> [(Int, Int)]
144 complementRangeList' x ((u,v):s) = (x,pred u) : complementRangeList'' v s
145 complementRangeList' x [] = [(x,0x10ffff)]
147 -- | Compute the complement intersected with @(x,)@.
148 complementRangeList'' :: Int -> [(Int, Int)] -> [(Int, Int)]
149 complementRangeList'' x s
150 | x == 0x10ffff = []
151 | otherwise = complementRangeList' (succ x) s
153 -- | Compute the complement.
155 -- Note: we treat Ints as codepoints, i.e minBound is 0, and maxBound is 0x10ffff
156 complementRangeList :: [(Int, Int)] -> [(Int, Int)]
157 complementRangeList s@((x,y):s')
158 | x == 0 = complementRangeList'' y s'
159 | otherwise = complementRangeList' 0 s
160 complementRangeList [] = [(0, 0x10ffff)]
162 -- | Difference of two 'CharSet's.
163 difference :: CharSet -> CharSet -> CharSet
164 difference xs ys = intersection xs (complement ys)
166 -- | Make 'CharSet' from a list of characters, i.e. 'String'.
167 fromList :: String -> CharSet
168 fromList = normalise . foldl' (\ acc c -> IM.insert (ord c) (ord c) acc) IM.empty
170 -- | Convert 'CharSet' to a list of characters i.e. 'String'.
171 toList :: CharSet -> String
172 toList = concatMap (uncurry enumFromTo) . toIntervalList
174 -- | Convert to interval list
176 -- >>> toIntervalList $ union "01234" "56789"
177 -- [('0','9')]
179 toIntervalList :: CharSet -> [(Char, Char)]
180 toIntervalList (CS m) = [ (chr lo, chr hi) | (lo, hi) <- IM.toList m ]
182 -- | Convert from interval pairs.
184 -- >>> fromIntervalList []
185 -- ""
187 -- >>> fromIntervalList [('a','f'), ('0','9')]
188 -- "0123456789abcdef"
190 -- >>> fromIntervalList [('Z','A')]
191 -- ""
193 fromIntervalList :: [(Char,Char)] -> CharSet
194 fromIntervalList xs = normalise' $ sortBy (\a b -> compare (fst a) (fst b))
195 [ (ord lo, ord hi)
196 | (lo, hi) <- xs
197 , lo <= hi
200 -------------------------------------------------------------------------------
201 -- Normalisation
202 -------------------------------------------------------------------------------
204 normalise :: IM.IntMap Int -> CharSet
205 normalise = normalise'. IM.toList
207 normalise' :: [(Int,Int)] -> CharSet
208 normalise' = CS . IM.fromList . go where
209 go :: [(Int,Int)] -> [(Int,Int)]
210 go [] = []
211 go ((x,y):zs) = go' x y zs
213 go' :: Int -> Int -> [(Int, Int)] -> [(Int, Int)]
214 go' lo hi [] = [(lo, hi)]
215 go' lo hi ws0@((u,v):ws)
216 | u <= succ hi = go' lo (max v hi) ws
217 | otherwise = (lo,hi) : go ws0
219 -------------------------------------------------------------------------------
220 -- Alpha Numeric character list
221 -------------------------------------------------------------------------------
223 -- Computing this takes some time,
224 -- but they are not used in-non testing in Cabal's normal operation.
226 -- | Note: this set varies depending on @base@ version.
228 alpha :: CharSet
229 alpha = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isAlpha c ]
230 {-# NOINLINE alpha #-}
232 -- | Note: this set varies depending on @base@ version.
234 alphanum :: CharSet
235 alphanum = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isAlphaNum c ]
236 {-# NOINLINE alphanum #-}
238 -- | Note: this set varies depending on @base@ version.
240 upper :: CharSet
241 upper = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isUpper c ]
242 {-# NOINLINE upper #-}
244 -- $setup
245 -- Use -XOverloadedStrings to avoid the error: Couldn't match type ‘[Char]’ with ‘CharSet’
246 -- >>> :set -XOverloadedStrings
247 -- >>> import Prelude (length)