2 * Functions for RFC 3986 percent-encoding.
6 * This file was originally imported from the Squid project but has been
7 * significantly altered. The licence below is reproduced intact, but refers
8 * to files in Squid's repository, not in Samba. See COPYING for the GPLv3
9 * notice (being the later version mentioned below).
16 * AUTHOR: Harvest Derived
18 * SQUID Web Proxy Cache http://www.squid-cache.org/
19 * ----------------------------------------------------------
21 * Squid is the result of efforts by numerous individuals from
22 * the Internet community; see the CONTRIBUTORS file for full
23 * details. Many organizations have provided support for Squid's
24 * development; see the SPONSORS file for full details. Squid is
25 * Copyrighted (C) 2001 by the Regents of the University of
26 * California; see the COPYRIGHT file for full details. Squid
27 * incorporates software developed and/or copyrighted by other
28 * sources; see the CREDITS file for full details.
30 * This program is free software; you can redistribute it and/or modify
31 * it under the terms of the GNU General Public License as published by
32 * the Free Software Foundation; either version 2 of the License, or
33 * (at your option) any later version.
35 * This program is distributed in the hope that it will be useful,
36 * but WITHOUT ANY WARRANTY; without even the implied warranty of
37 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38 * GNU General Public License for more details.
40 * You should have received a copy of the GNU General Public License
41 * along with this program; if not, write to the Free Software
42 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
48 #include "lib/util/samba_util.h"
50 #define RFC1738_ENCODE 1
51 #define RFC1738_RESERVED 2
54 * According to RFC 1738, "$-_.+!*'()," are not reserved or unsafe, but as
55 * that has been obsolete since 2004, we sm instead for RFC 3986, where:
57 * reserved = : / ? # [ ] @ ! $ & ' ( ) * + , ; =
58 * unreserved = ALPHA DIGIT - . _ ~
60 * and whatever is not in either of those are what RFC 1738 called "unsafe",
61 * meaning that they should are canonically but not mandatorily escaped.
63 * Characters below 0x20 or above 0x7E are always encoded.
66 static const unsigned char escapees
[127] = {
67 [' '] = RFC1738_ENCODE
,
68 ['"'] = RFC1738_ENCODE
,
69 ['%'] = RFC1738_ENCODE
,
70 ['<'] = RFC1738_ENCODE
,
71 ['>'] = RFC1738_ENCODE
,
72 ['\\'] = RFC1738_ENCODE
,
73 ['^'] = RFC1738_ENCODE
,
74 ['`'] = RFC1738_ENCODE
,
75 ['{'] = RFC1738_ENCODE
,
76 ['|'] = RFC1738_ENCODE
,
77 ['}'] = RFC1738_ENCODE
,
78 /* reserved : / ? # [ ] @ ! $ & ' ( ) * + , ; = */
79 [':'] = RFC1738_RESERVED
,
80 ['/'] = RFC1738_RESERVED
,
81 ['?'] = RFC1738_RESERVED
,
82 ['#'] = RFC1738_RESERVED
,
83 ['['] = RFC1738_RESERVED
,
84 [']'] = RFC1738_RESERVED
,
85 ['@'] = RFC1738_RESERVED
,
86 ['!'] = RFC1738_RESERVED
,
87 ['$'] = RFC1738_RESERVED
,
88 ['&'] = RFC1738_RESERVED
,
89 ['\''] = RFC1738_RESERVED
,
90 ['('] = RFC1738_RESERVED
,
91 [')'] = RFC1738_RESERVED
,
92 ['*'] = RFC1738_RESERVED
,
93 ['+'] = RFC1738_RESERVED
,
94 [','] = RFC1738_RESERVED
,
95 [';'] = RFC1738_RESERVED
,
96 ['='] = RFC1738_RESERVED
,
100 * rfc1738_do_escape - fills a preallocated buffer with an escaped version of
103 * For canonical escaping, mask should be RFC1738_ENCODE | RFC1738_RESERVED.
104 * For mandatory escaping, mask should be RFC1738_RESERVED.
107 rfc1738_do_escape(char *buf
, size_t bufsize
,
108 const char *url
, size_t len
, unsigned char mask
)
112 for (i
= 0; i
< len
; i
++) {
113 unsigned int c
= (unsigned char) url
[i
];
114 if (c
> 126 || c
< 32 || (escapees
[c
] & mask
)) {
115 if (j
+ 3 >= bufsize
) {
118 (void) snprintf(&buf
[j
], 4, "%%%02X", c
);
121 if (j
+ 1 >= bufsize
) {
133 * rfc1738_escape_part - Returns a talloced buffer that contains the RFC 3986
134 * compliant, escaped version of the given url segment.
137 rfc1738_escape_part(TALLOC_CTX
*mem_ctx
, const char *url
)
142 size_t len
= strlen(url
);
143 if (len
>= SIZE_MAX
/ 3) {
147 bufsize
= len
* 3 + 1;
148 buf
= talloc_array(mem_ctx
, char, bufsize
);
153 talloc_set_name_const(buf
, buf
);
155 return rfc1738_do_escape(buf
, bufsize
, url
, len
,
156 RFC1738_ENCODE
| RFC1738_RESERVED
);
160 * rfc1738_unescape() - Converts url-escaped characters in the string.
162 * The two characters following a '%' in a string should be hex digits that
163 * describe an encoded byte. For example, "%25" is hex 0x25 or '%' in ASCII;
164 * this is the only way to include a % in the unescaped string. Any character
165 * can be escaped, including plain letters (e.g. "%61" for "a"). Anything
166 * other than 2 hex characters following the % is an error.
168 * The conversion is done in-place, which is always safe as unescapes can only
169 * shorten the string.
171 * Returns a pointer to the end of the string (that is, the '\0' byte), or
172 * NULL on error, at which point s is in an undefined state.
174 * Note that after `char *e = rfc_unescape(s)`, `strlen(s)` will not equal
175 * `e - s` if s originally contained "%00". You might want to check for this.
178 _PUBLIC_
char *rfc1738_unescape(char *s
)
180 size_t i
, j
; /* i is write, j is read */
181 for (i
= 0, j
= 0; s
[j
] != '\0'; i
++, j
++) {
186 ok
= hex_byte(&s
[j
+1], &v
);
190 j
+= 2; /* OK; hex_byte() has checked ahead */
191 s
[i
] = (unsigned char)v
;