2 * Copyright (c) 2012 Elias Norberg <xyzzy@kudzu.se>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #define TLD_TREE_END_NODE 1
20 #define TLD_TREE_EXCEPTION 2
22 struct tld_tree_node
{
23 struct tld_tree_node
*next
;
24 struct tld_tree_node
*child
;
29 struct tld_tree_node tld_tree_root
= { NULL
, NULL
, "" };
31 #define TREE_INSERT_CHILD(n, data) \
32 n->child = g_malloc(sizeof *n); \
33 n->child->next = NULL; \
34 n->child->child = NULL; \
35 n->child->flags = 0; \
38 #define TREE_INSERT_NEXT(n, data) \
39 n->next = g_malloc(sizeof *n); \
40 n->next->next = NULL; \
41 n->next->child = NULL; \
50 #define INITIAL_BIAS (72)
51 #define INITIAL_N (128)
54 adapt(int delta
, int numpoints
, int firsttime
)
59 delta
= delta
/ P_DAMP
;
63 delta
+= (delta
/ numpoints
);
66 while (delta
> (((P_BASE
- P_TMIN
) * P_TMAX
) / 2)) {
67 delta
= delta
/ (P_BASE
- P_TMIN
);
71 k
+= (((P_BASE
- P_TMIN
+ 1) * delta
) / (delta
+ P_SKEW
));
76 get_minimum_char(char *str
, int n
)
79 gunichar min
= 0xffffff;
81 for(; *str
; str
= g_utf8_next_char(str
)) {
82 ch
= g_utf8_get_char(str
);
83 if (ch
>= n
&& ch
< min
)
95 return (n
- 26) + '0';
99 punycode_encode(char *str
)
109 int bias
= INITIAL_BIAS
;
110 int h
, b
, m
, k
, t
, q
;
113 for (s
=str
; *s
; s
= g_utf8_next_char(s
)) {
114 c
= g_utf8_get_char(s
);
124 return g_strdup(output
);
126 h
= b
= strlen(output
);
132 len
= g_utf8_strlen(str
, -1);
134 m
= get_minimum_char(str
, n
);
135 delta
+= (m
- n
) * (h
+ 1);
137 for (s
=str
; *s
; s
= g_utf8_next_char(s
)) {
138 c
= g_utf8_get_char(s
);
143 for (k
=P_BASE
;; k
+=P_BASE
) {
146 else if(k
>= bias
+ P_TMAX
)
154 output
[l
++] = encode_digit(t
+((q
-t
)%(P_BASE
-t
)));
155 q
= (q
- t
) / (P_BASE
- t
);
157 output
[l
++] = encode_digit(q
);
158 bias
= adapt(delta
, h
+ 1, h
== b
);
169 output
[i
] = output
[i
-4];
176 return g_strdup(output
);
180 * strrchr2(str, saveptr, ch)
182 * Walk backwards through str, jumping to next 'ch'
183 * On first call, *saveptr should be set to NULL.
184 * On following calls, supply the same saveptr.
186 * Returns NULL when the whole string 'str' has been
187 * looped through. Otherwise returns the position
188 * before the next 'ch'.
191 strrchr2(const char *str
, const char **saveptr
, int ch
)
195 if (str
!= NULL
&& *saveptr
== NULL
) {
196 *saveptr
= str
+ strlen(str
);
197 } else if (str
== *saveptr
) {
201 for (ptr
= *saveptr
- 1; ptr
!= str
&& *ptr
!= ch
; ptr
--)
213 * Adds a tld-rule to the tree
216 tld_tree_add(const char *rule
)
218 struct tld_tree_node
*n
;
223 lbl
= strrchr2(rule
, &saveptr
, '.');
225 for (n
= &tld_tree_root
; lbl
!= NULL
;) {
227 if (strcmp(n
->lbl
, lbl
) == 0) {
228 lbl
= strrchr2(rule
, &saveptr
, '.');
237 if (n
->next
== NULL
) {
238 TREE_INSERT_NEXT(n
, lbl
);
241 lbl
= strrchr2(rule
, &saveptr
, '.');
248 TREE_INSERT_CHILD(n
, lbl
);
250 lbl
= strrchr2(rule
, &saveptr
, '.');
254 n
->flags
|= TLD_TREE_END_NODE
;
255 if (n
->lbl
[0] == '!') {
256 n
->flags
|= TLD_TREE_EXCEPTION
;
267 char *ptr
, *next_lbl
;
272 snprintf(file
, sizeof file
, "%s" PS
"tld-rules", resource_dir
);
273 fd
= fopen(file
, "r");
275 /* a poor replacement for the real list - but it's
276 * better than nothing.
279 startpage_add("Could not open %s: this file is required "
280 "to handle TLD whitelisting properly", file
);
285 ptr
= fgets(buf
, sizeof buf
, fd
);
286 if (ptr
== NULL
|| feof(fd
))
290 if ((ptr
= strstr(buf
, "//")) != NULL
)
292 /* skip anything after space or tab */
293 for (ptr
= buf
; *ptr
; ptr
++)
294 if (*ptr
== ' ' || *ptr
== '\t' ||
295 *ptr
== '\n' || *ptr
== '\r')
304 if (buf
[0] == '!' && buf
[0] == '*') {
311 /* split into labels, and convert them one by one */
313 if ((next_lbl
= strchr(ptr
, '.')))
316 enc_lbl
= punycode_encode(ptr
);
319 rule
= g_strdup_printf("%s%s%s", rp
, enc_lbl
,
320 next_lbl
? "." : "");
324 rule
= g_strdup_printf("%.1s%s%s",
325 extra_ch
? buf
:"", enc_lbl
,
341 * tld_get_suffix(domain)
343 * Find the public suffix for domain.
345 * Returns a pointer to the suffix position
346 * in domain, or NULL if no public suffix
350 tld_get_suffix(const char *domain
)
352 struct tld_tree_node
*n
;
354 const char *lbl
, *saveptr
;
355 const char *tmp_saveptr
, *tmp_lbl
;
359 if (domain
[0] == '.')
364 lbl
= strrchr2(domain
, &saveptr
, '.');
366 for (n
= &tld_tree_root
; n
!= NULL
&& lbl
!= NULL
;) {
368 if (!strlen(n
->lbl
)) {
373 if (n
->lbl
[0] == '*') {
374 if (n
->flags
& TLD_TREE_END_NODE
) {
376 tmp_saveptr
= saveptr
;
379 lbl
= strrchr2(domain
, &saveptr
, '.');
381 /* Save possible public suffix */
383 saveptr
= tmp_saveptr
;
391 if (strcmp(n
->lbl
, lbl
) == 0) {
392 if (n
->flags
& TLD_TREE_EXCEPTION
) {
393 /* We're done looking */
398 lbl
= strrchr2(domain
, &saveptr
, '.');
400 /* Possible public suffix - other rules might
403 if (n
->flags
& TLD_TREE_END_NODE
)
406 /* Domain too short */
408 /* Check if we have a child that is '*' */
409 for (n
= n
->child
; n
; n
= n
->next
)
410 if (n
->lbl
[0] == '*')
415 if (n
->child
== NULL
)
427 /* If we can't find a matching suffix, it can mean that either
428 * a) the user is surfing a local prefix
429 * b) the list is not properly updated
431 * In any case - in order not to break stuff while surfing
432 * new TLD's, we return the public suffix as the top 2 labels
434 * www.abc.xyz therefore has public suffix 'abc.xyz'
438 lbl
= strrchr2(domain
, &saveptr
, '.');
439 lbl
= strrchr2(domain
, &saveptr
, '.');
443 return ((char*)suffix
);