3 -- A slightly modified copy of:
5 -- URI parsing, composition and relative URL resolution
8 -- RCS ID: $Id: url.lua,v 1.38 2006/04/03 04:45:42 diego Exp $
10 -- http://w3.impa.br/~diego/software/luasocket/url.html
15 -----------------------------------------------------------------------------
16 -- Encodes a string into its escaped hexadecimal representation
18 -- s: binary string to be encoded
20 -- escaped representation of string binary
21 -----------------------------------------------------------------------------
23 return string.gsub(s
, "([^A-Za-z0-9_])", function(c
)
24 return string.format("%%%02x", string.byte(c
))
28 -----------------------------------------------------------------------------
29 -- Protects a path segment, to prevent it from interfering with the
32 -- s: binary string to be encoded
34 -- escaped representation of string binary
35 -----------------------------------------------------------------------------
36 local function make_set(t
)
38 for i
,v
in base
.ipairs(t
) do
44 -- these are allowed withing a path segment, along with alphanum
45 -- other characters must be escaped
46 local segment_set
= make_set
{
47 "-", "_", ".", "!", "~", "*", "'", "(",
48 ")", ":", "@", "&", "=", "+", "$", ",",
51 local function protect_segment(s
)
52 return string.gsub(s
, "([^A-Za-z0-9_])", function (c
)
53 if segment_set
[c
] then return c
54 else return string.format("%%%02x", string.byte(c
)) end
58 -----------------------------------------------------------------------------
59 -- Decodes a string from its escaped hexadecimal representation
61 -- s: binary string to be decoded
63 -- unescaped representation of string binary
64 -----------------------------------------------------------------------------
65 function M
.unescape(s
)
66 return string.gsub(s
, "%%(%x%x)", function(hex
)
67 return string.char(base
.tonumber(hex
, 16))
71 -----------------------------------------------------------------------------
72 -- Builds a path from a base path and a relative path
77 -- corresponding absolute path
78 -----------------------------------------------------------------------------
79 local function absolute_path(base_path
, relative_path
)
80 if string.sub(relative_path
, 1, 1) == "/" then return relative_path
end
81 local path
= string.gsub(base_path
, "[^/]*$", "")
82 path
= path
.. relative_path
83 path
= string.gsub(path
, "([^/]*%./)", function (s
)
84 if s
~= "./" then return s
else return "" end
86 path
= string.gsub(path
, "/%.$", "/")
88 while reduced
~= path
do
90 path
= string.gsub(reduced
, "([^/]*/%.%./)", function (s
)
91 if s
~= "../../" then return "" else return s
end
94 path
= string.gsub(reduced
, "([^/]*/%.%.)$", function (s
)
95 if s
~= "../.." then return "" else return s
end
100 -----------------------------------------------------------------------------
101 -- Parses a url and returns a table with all its parts according to RFC 2396
102 -- The following grammar describes the names given to the URL parts
103 -- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment>
104 -- <authority> ::= <userinfo>@<host>:<port>
105 -- <userinfo> ::= <user>[:<password>]
106 -- <path> :: = {<segment>/}<segment>
108 -- url: uniform resource locator of request
109 -- default: table with default values for each field
111 -- table with the following fields, where RFC naming conventions have
113 -- scheme, authority, userinfo, user, password, host, port,
114 -- path, params, query, fragment
116 -- the leading '/' in {/<path>} is considered part of <path>
117 -----------------------------------------------------------------------------
118 function M
.parse(url
, default
)
119 -- initialize default parameters
121 for i
,v
in base
.pairs(default
or parsed
) do parsed
[i
] = v
end
122 -- empty url is parsed to nil
123 if not url
or url
== "" then return nil, "invalid url" end
125 -- url = string.gsub(url, "%s", "")
127 url
= string.gsub(url
, "#(.*)$", function(f
)
132 url
= string.gsub(url
, "^([%w][%w%+%-%.]*)%:",
133 function(s
) parsed
.scheme
= s
; return "" end)
135 url
= string.gsub(url
, "^//([^/]*)", function(n
)
139 -- get query stringing
140 url
= string.gsub(url
, "%?(.*)", function(q
)
145 url
= string.gsub(url
, "%;(.*)", function(p
)
149 -- path is whatever was left
150 if url
~= "" then parsed
.path
= url
end
151 local authority
= parsed
.authority
152 if not authority
then return parsed
end
153 authority
= string.gsub(authority
,"^([^@]*)@",
154 function(u
) parsed
.userinfo
= u
; return "" end)
155 authority
= string.gsub(authority
, ":([^:]*)$",
156 function(p
) parsed
.port
= p
; return "" end)
157 if authority
~= "" then parsed
.host
= authority
end
158 local userinfo
= parsed
.userinfo
159 if not userinfo
then return parsed
end
160 userinfo
= string.gsub(userinfo
, ":([^:]*)$",
161 function(p
) parsed
.password
= p
; return "" end)
162 parsed
.user
= userinfo
166 -----------------------------------------------------------------------------
167 -- Rebuilds a parsed URL from its components.
168 -- Components are protected if any reserved or unallowed characters are found
170 -- parsed: parsed URL, as returned by parse
172 -- a stringing with the corresponding URL
173 -----------------------------------------------------------------------------
174 function M
.build(parsed
)
175 local ppath
= parse_path(parsed
.path
or "")
176 local url
= M
.build_path(ppath
)
177 if parsed
.params
then url
= url
.. ";" .. parsed
.params
end
178 if parsed
.query
then url
= url
.. "?" .. parsed
.query
end
179 local authority
= parsed
.authority
181 authority
= parsed
.host
182 if parsed
.port
then authority
= authority
.. ":" .. parsed
.port
end
183 local userinfo
= parsed
.userinfo
185 userinfo
= parsed
.user
186 if parsed
.password
then
187 userinfo
= userinfo
.. ":" .. parsed
.password
190 if userinfo
then authority
= userinfo
.. "@" .. authority
end
192 if authority
then url
= "//" .. authority
.. url
end
193 if parsed
.scheme
then url
= parsed
.scheme
.. ":" .. url
end
194 if parsed
.fragment
then url
= url
.. "#" .. parsed
.fragment
end
195 -- url = string.gsub(url, "%s", "")
199 -----------------------------------------------------------------------------
200 -- Builds a absolute URL from a base and a relative URL according to RFC 2396
205 -- corresponding absolute url
206 -----------------------------------------------------------------------------
207 function M
.absolute(base_url
, relative_url
)
208 if base
.type(base_url
) == "table" then
209 base_parsed
= base_url
210 base_url
= M
.build(base_parsed
)
212 base_parsed
= M
.parse(base_url
)
214 local relative_parsed
= M
.parse(relative_url
)
215 if not base_parsed
then return relative_url
216 elseif not relative_parsed
then return base_url
217 elseif relative_parsed
.scheme
then return relative_url
219 relative_parsed
.scheme
= base_parsed
.scheme
220 if not relative_parsed
.authority
then
221 relative_parsed
.authority
= base_parsed
.authority
222 if not relative_parsed
.path
then
223 relative_parsed
.path
= base_parsed
.path
224 if not relative_parsed
.params
then
225 relative_parsed
.params
= base_parsed
.params
226 if not relative_parsed
.query
then
227 relative_parsed
.query
= base_parsed
.query
231 relative_parsed
.path
= absolute_path(base_parsed
.path
or "",
232 relative_parsed
.path
)
235 return M
.build(relative_parsed
)
239 -----------------------------------------------------------------------------
240 -- Breaks a path into its segments, unescaping the segments
244 -- segment: a table with one entry per segment
245 -----------------------------------------------------------------------------
246 function M
.parse_path(path
)
249 --path = string.gsub(path, "%s", "")
250 string.gsub(path
, "([^/]+)", function (s
) table.insert(parsed
, s
) end)
251 for i
= 1, table.getn(parsed
) do
252 parsed
[i
] = M
.unescape(parsed
[i
])
254 if string.sub(path
, 1, 1) == "/" then parsed
.is_absolute
= 1 end
255 if string.sub(path
, -1, -1) == "/" then parsed
.is_directory
= 1 end
259 -----------------------------------------------------------------------------
260 -- Builds a path component from its segments, escaping protected characters.
262 -- parsed: path segments
263 -- unsafe: if true, segments are not protected before path is built
265 -- path: corresponding path stringing
266 -----------------------------------------------------------------------------
267 function M
.build_path(parsed
, unsafe
)
269 local n
= table.getn(parsed
)
272 path
= path
.. parsed
[i
]
276 path
= path
.. parsed
[n
]
277 if parsed
.is_directory
then path
= path
.. "/" end
281 path
= path
.. protect_segment(parsed
[i
])
285 path
= path
.. protect_segment(parsed
[n
])
286 if parsed
.is_directory
then path
= path
.. "/" end
289 if parsed
.is_absolute
then path
= "/" .. path
end