1 -- This file is part of Diohsc
2 -- Copyright (C) 2020 Martin Bays <mbays@sdf.org>
4 -- This program is free software: you can redistribute it and/or modify
5 -- it under the terms of version 3 of the GNU General Public License as
6 -- published by the Free Software Foundation, or any later version.
8 -- You should have received a copy of the GNU General Public License
9 -- along with this program. If not, see http://www.gnu.org/licenses/.
11 {-# LANGUAGE OverloadedStrings #-}
40 import Control
.Monad
(mplus
, (<=<))
41 import Data
.Char (toLower)
42 import Data
.List
(dropWhileEnd
)
43 import Data
.Maybe (isNothing)
46 import qualified Network
.URI
as NU
48 defaultScheme
:: String
49 defaultScheme
= "gemini"
54 -- | Represents a normalised absolute URI with scheme and port defaults as above.
55 -- We use "Uri" rather than "URI" in camelcase,
56 -- because I prefer to think of it as a word rather than an acronym.
57 -- Still use "URI" if it's the first/only word of the identifier.
58 newtype URI
= URI
{uriUri
:: NU
.URI
}
60 instance Show URI
where
61 show (URI uri
) = show uri
63 uriPath
, uriQuery
, uriFragment
:: URI
-> String
64 uriPath
= NU
.uriPath
. uriUri
65 uriQuery
= NU
.uriQuery
. uriUri
66 uriFragment
= NU
.uriFragment
. uriUri
68 -- | strips trailing ':'
69 uriScheme
:: URI
-> String
70 uriScheme
= init . NU
.uriScheme
. uriUri
72 pathSegments
:: URI
-> [String]
73 pathSegments
(URI uri
) = NU
.pathSegments uri
76 nullUri
= URI NU
.nullURI
78 -- | URI reference. May be absolute. Not normalised.
79 newtype URIRef
= URIRef NU
.URI
81 instance Show URIRef
where
82 show (URIRef uri
) = show uri
84 normaliseUri
:: NU
.URI
-> URI
85 normaliseUri uri
= URI
$ uri
86 { NU
.uriPath
= (\p
-> if null p
then "/" else p
) .
87 NU
.normalizePathSegments
. NU
.normalizeEscape
$ NU
.uriPath uri
88 , NU
.uriScheme
= if null $ NU
.uriScheme uri
89 then defaultScheme
else toLower <$> NU
.uriScheme uri
90 , NU
.uriAuthority
= (\auth
-> auth
92 if NU
.uriPort auth
== ':' : show defaultPort
93 then "" else NU
.uriPort auth
94 , NU
.uriRegName
= toLower <$> NU
.uriRegName auth
96 <$> NU
.uriAuthority uri
97 , NU
.uriQuery
= NU
.normalizeEscape
$ NU
.uriQuery uri
100 stripUriForGemini
:: URI
-> URI
101 stripUriForGemini
(URI uri
) = URI
$ uri
102 { NU
.uriAuthority
= (\auth
-> auth
{NU
.uriUserInfo
= ""}) <$> NU
.uriAuthority uri
103 , NU
.uriFragment
= ""
106 parseAbsoluteUri
:: String -> Maybe URI
107 parseAbsoluteUri
= (normaliseUri
<$>) . NU
.parseURI
109 parseUriAsAbsolute
:: String -> Maybe URI
110 parseUriAsAbsolute s
= parseAbsoluteUri s `mplus` parseAbsoluteUri
(defaultScheme
++ "://" ++ s
)
112 parseUriReference
:: String -> Maybe URIRef
113 parseUriReference
= (URIRef
<$>) . NU
.parseURIReference
116 setQuery
:: String -> URI
-> URI
117 setQuery q
(URI uri
) = URI
$ uri
{ NU
.uriQuery
= q
}
119 stripUri
:: URI
-> URI
120 stripUri
(URI uri
) = URI
$ uri
{ NU
.uriPath
= dropWhileEnd
(== '/') $ NU
.uriPath uri
, NU
.uriQuery
= "" }
122 relativeTo
:: URIRef
-> URI
-> URI
123 relativeTo
(URIRef ref
) (URI uri
) = normaliseUri
$ NU
.relativeTo ref uri
125 -- | lift NU.relativeFrom, but set scheme when the result is absolute,
126 -- and avoid initial slash where possible, and prefer "." to "" and ".." to "../"
127 relativeFrom
:: URI
-> URI
-> URIRef
128 relativeFrom
(URI uri1
) (URI uri2
) =
129 URIRef
. fixDots
. stripSlash
. setScheme
$ NU
.relativeFrom uri1 uri2
where
130 setScheme ref |
isNothing (NU
.uriAuthority ref
) = ref
131 |
otherwise = ref
{ NU
.uriScheme
= NU
.uriScheme uri1
}
132 stripSlash ref |
'/':path
' <- NU
.uriPath ref
134 , ref
' <- ref
{ NU
.uriPath
= path
' }
135 , NU
.relativeTo ref
' uri2
== uri1
= ref
'
137 fixDots ref
= case NU
.uriPath ref
of
138 "" | ref
' <- ref
{ NU
.uriPath
= "." }
139 , NU
.relativeTo ref
' uri2
== uri1
-> ref
'
140 "../" -> ref
{ NU
.uriPath
= ".." }
143 uriRegName
:: URI
-> Maybe String
144 uriRegName
= (NU
.uriRegName
<$>) . NU
.uriAuthority
. uriUri
146 uriPort
:: URI
-> Maybe Int
147 uriPort
= (readPort
. NU
.uriPort
) <=< (NU
.uriAuthority
. uriUri
)
149 readPort
(':':n
) = readMay n
152 escapePathString
:: String -> String
153 escapePathString
= NU
.escapeURIString
(\c
-> NU
.isUnreserved c || c
== '/')
155 unescapeUriString
:: String -> String
156 unescapeUriString
= NU
.unEscapeString
158 -- | unreserved / sub-delims / ":" / "@" / "/" / "?"
159 isUnescapedInQuery
:: Char -> Bool
160 isUnescapedInQuery c
= NU
.isUnescapedInURI c
&& c `
notElem`
("#[]"::String)
162 escapeQuery
:: String -> String
163 escapeQuery
= NU
.escapeURIString isUnescapedInQuery
. withEscapes
166 withEscapes
('\\':'x
':h1
:h2
:s
) | Just c
<- readMay
$ "'\\x" <> [h1
,h2
,'\''] = c
:withEscapes s
167 withEscapes
('\\':'e
':s
) = '\ESC
':withEscapes s
168 withEscapes
('\\':'r
':s
) = '\r':withEscapes s
169 withEscapes
('\\':'n
':s
) = '\n':withEscapes s
170 withEscapes
('\\':'t
':s
) = '\t':withEscapes s
171 withEscapes
('\\':c
:s
) = c
:withEscapes s
172 withEscapes
(c
:s
) = c
:withEscapes s
174 -- |escape the query part of an unparsed uri string
175 escapeQueryPart
:: String -> String
177 |
(s
','?
':q
) <- break (== '?
') s
= s
' ++ '?
' : escapeQuery q
180 -- |conversion of IRI to URI according to Step 2 in Section 3.1 in RFC3987
181 -- (for now at least, we apply this also to the regname rather than
183 escapeIRI
:: String -> String
184 escapeIRI
= NU
.escapeURIString
(not . escape
)
186 -- |ucschar or iprivate in RFC3987
187 escape
:: Char -> Bool
188 escape c
= let i
= fromEnum c
in
189 i
>= 0xA0 && i
<= 0xD7FF ||
190 i
>= 0xE000 && i
<= 0xF8FF ||
191 i
>= 0xF900 && i
<= 0xFDCF ||
192 i
>= 0xFDF0 && i
<= 0xFFEF ||
193 i
>= 0x10000 && i
<= 0x1FFFD ||
194 i
>= 0x20000 && i
<= 0x2FFFD ||
195 i
>= 0x30000 && i
<= 0x3FFFD ||
196 i
>= 0x40000 && i
<= 0x4FFFD ||
197 i
>= 0x50000 && i
<= 0x5FFFD ||
198 i
>= 0x60000 && i
<= 0x6FFFD ||
199 i
>= 0x70000 && i
<= 0x7FFFD ||
200 i
>= 0x80000 && i
<= 0x8FFFD ||
201 i
>= 0x90000 && i
<= 0x9FFFD ||
202 i
>= 0xA0000 && i
<= 0xAFFFD ||
203 i
>= 0xB0000 && i
<= 0xBFFFD ||
204 i
>= 0xC0000 && i
<= 0xCFFFD ||
205 i
>= 0xD0000 && i
<= 0xDFFFD ||
206 i
>= 0xE1000 && i
<= 0xEFFFD ||
207 i
>= 0xF0000 && i
<= 0xFFFFD ||
208 i
>= 0x100000 && i
<= 0x10FFFD