tweak paging height
[diohsc.git] / URI.hs
blobbbb5bd9f6f4dbaf922cb117ed7de87aff2ddaf3a
1 -- This file is part of Diohsc
2 -- Copyright (C) 2020 Martin Bays <mbays@sdf.org>
3 --
4 -- This program is free software: you can redistribute it and/or modify
5 -- it under the terms of version 3 of the GNU General Public License as
6 -- published by the Free Software Foundation, or any later version.
7 --
8 -- You should have received a copy of the GNU General Public License
9 -- along with this program. If not, see http://www.gnu.org/licenses/.
11 {-# LANGUAGE OverloadedStrings #-}
12 {-# LANGUAGE Safe #-}
14 module URI
15 ( URI
16 , URIRef
17 , escapeIRI
18 , escapePathString
19 , escapeQuery
20 , escapeQueryPart
21 , nullUri
22 , parseAbsoluteUri
23 , parseUriAsAbsolute
24 , parseUriReference
25 , pathSegments
26 , relativeFrom
27 , relativeTo
28 , setQuery
29 , stripUri
30 , unescapeUriString
31 , uriFragment
32 , uriPath
33 , uriPort
34 , uriRegName
35 , uriScheme
36 , uriQuery
37 ) where
39 import Control.Monad (mplus, (<=<))
40 import Data.Char (toLower)
41 import Data.List (dropWhileEnd)
42 import Data.Maybe (isNothing)
43 import Safe (readMay)
45 import qualified Network.URI as NU
47 defaultScheme :: String
48 defaultScheme = "gemini"
50 defaultPort :: Int
51 defaultPort = 1965
53 -- | Represents a normalised absolute URI with scheme and port defaults as above.
54 -- We use "Uri" rather than "URI" in camelcase,
55 -- because I prefer to think of it as a word rather than an acronym.
56 -- Still use "URI" if it's the first/only word of the identifier.
57 newtype URI = URI {uriUri :: NU.URI}
58 deriving (Eq,Ord)
59 instance Show URI where
60 show (URI uri) = show uri
62 uriPath, uriQuery, uriFragment :: URI -> String
63 uriPath = NU.uriPath . uriUri
64 uriQuery = NU.uriQuery . uriUri
65 uriFragment = NU.uriFragment . uriUri
67 -- | strips trailing ':'
68 uriScheme :: URI -> String
69 uriScheme = init . NU.uriScheme . uriUri
71 pathSegments :: URI -> [String]
72 pathSegments (URI uri) = NU.pathSegments uri
74 nullUri :: URI
75 nullUri = URI NU.nullURI
77 -- | URI reference. May be absolute. Not normalised.
78 newtype URIRef = URIRef NU.URI
79 deriving (Eq,Ord)
80 instance Show URIRef where
81 show (URIRef uri) = show uri
83 normaliseUri :: NU.URI -> URI
84 normaliseUri uri = URI $ uri
85 { NU.uriPath = (\p -> if null p then "/" else p) .
86 NU.normalizePathSegments . NU.normalizeEscape $ NU.uriPath uri
87 , NU.uriFragment = ""
88 , NU.uriScheme = if null $ NU.uriScheme uri
89 then defaultScheme else toLower <$> NU.uriScheme uri
90 , NU.uriAuthority = (\auth -> auth
91 { NU.uriPort =
92 if NU.uriPort auth == ':' : show defaultPort
93 then "" else NU.uriPort auth
94 , NU.uriRegName = toLower <$> NU.uriRegName auth
95 , NU.uriUserInfo = ""
97 <$> NU.uriAuthority uri
98 , NU.uriQuery = NU.normalizeEscape $ NU.uriQuery uri
101 parseAbsoluteUri :: String -> Maybe URI
102 parseAbsoluteUri = (normaliseUri <$>) . NU.parseURI
104 parseUriAsAbsolute :: String -> Maybe URI
105 parseUriAsAbsolute s = parseAbsoluteUri s `mplus` parseAbsoluteUri (defaultScheme ++ "://" ++ s)
107 parseUriReference :: String -> Maybe URIRef
108 parseUriReference = (URIRef <$>) . NU.parseURIReference
111 setQuery :: String -> URI -> URI
112 setQuery q (URI uri) = URI $ uri { NU.uriQuery = q }
114 stripUri :: URI -> URI
115 stripUri (URI uri) = URI $ uri { NU.uriPath = dropWhileEnd (== '/') $ NU.uriPath uri, NU.uriQuery = "" }
117 relativeTo :: URIRef -> URI -> URI
118 relativeTo (URIRef ref) (URI uri) = normaliseUri $ NU.relativeTo ref uri
120 -- | lift NU.relativeFrom, but set scheme when the result is absolute,
121 -- and avoid initial slash where possible, and prefer "." to "" and ".." to "../"
122 relativeFrom :: URI -> URI -> URIRef
123 relativeFrom (URI uri1) (URI uri2) =
124 URIRef . fixDots . stripSlash . setScheme $ NU.relativeFrom uri1 uri2 where
125 setScheme ref | isNothing (NU.uriAuthority ref) = ref
126 | otherwise = ref { NU.uriScheme = NU.uriScheme uri1 }
127 stripSlash ref | '/':path' <- NU.uriPath ref
128 , not $ null path'
129 , ref' <- ref { NU.uriPath = path' }
130 , NU.relativeTo ref' uri2 == uri1 = ref'
131 | otherwise = ref
132 fixDots ref = case NU.uriPath ref of
133 "" | ref' <- ref { NU.uriPath = "." }
134 , NU.relativeTo ref' uri2 == uri1 -> ref'
135 "../" -> ref { NU.uriPath = ".." }
136 _ -> ref
138 uriRegName :: URI -> Maybe String
139 uriRegName = (NU.uriRegName <$>) . NU.uriAuthority . uriUri
141 uriPort :: URI -> Maybe Int
142 uriPort = (readPort . NU.uriPort) <=< (NU.uriAuthority . uriUri)
143 where
144 readPort (':':n) = readMay n
145 readPort _ = Nothing
147 escapePathString :: String -> String
148 escapePathString = NU.escapeURIString (\c -> NU.isUnreserved c || c == '/')
150 unescapeUriString :: String -> String
151 unescapeUriString = NU.unEscapeString
153 -- | unreserved / sub-delims / ":" / "@" / "/" / "?"
154 isUnescapedInQuery :: Char -> Bool
155 isUnescapedInQuery c = NU.isUnescapedInURI c && c `notElem` ("#[]"::String)
157 escapeQuery :: String -> String
158 escapeQuery = NU.escapeURIString isUnescapedInQuery . withEscapes
159 where
160 withEscapes "" = ""
161 withEscapes ('\\':'x':h1:h2:s) | Just c <- readMay $ "'\\x" <> [h1,h2,'\''] = c:withEscapes s
162 withEscapes ('\\':'e':s) = '\ESC':withEscapes s
163 withEscapes ('\\':'r':s) = '\r':withEscapes s
164 withEscapes ('\\':'n':s) = '\n':withEscapes s
165 withEscapes ('\\':'t':s) = '\t':withEscapes s
166 withEscapes ('\\':c:s) = c:withEscapes s
167 withEscapes (c:s) = c:withEscapes s
169 -- |escape the query part of an unparsed uri string
170 escapeQueryPart :: String -> String
171 escapeQueryPart s
172 | (s','?':q) <- break (== '?') s = s' ++ '?' : escapeQuery q
173 | otherwise = s
175 -- |conversion of IRI to URI according to Step 2 in Section 3.1 in RFC3987
176 -- (for now at least, we apply this also to the regname rather than
177 -- punycoding)
178 escapeIRI :: String -> String
179 escapeIRI = NU.escapeURIString (not . escape)
180 where
181 -- |ucschar or iprivate in RFC3987
182 escape :: Char -> Bool
183 escape c = let i = fromEnum c in
184 i >= 0xA0 && i <= 0xD7FF ||
185 i >= 0xE000 && i <= 0xF8FF ||
186 i >= 0xF900 && i <= 0xFDCF ||
187 i >= 0xFDF0 && i <= 0xFFEF ||
188 i >= 0x10000 && i <= 0x1FFFD ||
189 i >= 0x20000 && i <= 0x2FFFD ||
190 i >= 0x30000 && i <= 0x3FFFD ||
191 i >= 0x40000 && i <= 0x4FFFD ||
192 i >= 0x50000 && i <= 0x5FFFD ||
193 i >= 0x60000 && i <= 0x6FFFD ||
194 i >= 0x70000 && i <= 0x7FFFD ||
195 i >= 0x80000 && i <= 0x8FFFD ||
196 i >= 0x90000 && i <= 0x9FFFD ||
197 i >= 0xA0000 && i <= 0xAFFFD ||
198 i >= 0xB0000 && i <= 0xBFFFD ||
199 i >= 0xC0000 && i <= 0xCFFFD ||
200 i >= 0xD0000 && i <= 0xDFFFD ||
201 i >= 0xE1000 && i <= 0xEFFFD ||
202 i >= 0xF0000 && i <= 0xFFFFD ||
203 i >= 0x100000 && i <= 0x10FFFD