1 # ***** BEGIN LICENSE BLOCK *****
2 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 # The contents of this file are subject to the Mozilla Public License Version
5 # 1.1 (the "License"); you may not use this file except in compliance with
6 # the License. You may obtain a copy of the License at
7 # http://www.mozilla.org/MPL/
9 # Software distributed under the License is distributed on an "AS IS" basis,
10 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 # for the specific language governing rights and limitations under the
14 # The Original Code is Effective TLD conversion code.
16 # The Initial Developer of the Original Code is
17 # Jeff Walden <jwalden+code@mit.edu>.
18 # Portions created by the Initial Developer are Copyright (C) 2008
19 # the Initial Developer. All Rights Reserved.
23 # Alternatively, the contents of this file may be used under the terms of
24 # either the GNU General Public License Version 2 or later (the "GPL"), or
25 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26 # in which case the provisions of the GPL or the LGPL are applicable instead
27 # of those above. If you wish to allow use of your version of this file only
28 # under the terms of either the GPL or the LGPL, and not to allow others to
29 # use your version of this file under the terms of the MPL, indicate your
30 # decision by deleting the provisions above and replace them with the notice
31 # and other provisions required by the GPL or the LGPL. If you do not delete
32 # the provisions above, a recipient may use your version of this file under
33 # the terms of any one of the MPL, the GPL or the LGPL.
35 # ***** END LICENSE BLOCK *****
44 Processes a file containing effective TLD data. See the following URL for a
45 description of effective TLDs and of the file format that this script
46 processes (although for the latter you're better off just reading this file's
49 http://wiki.mozilla.org/Gecko:Effective_TLD_Service
52 def getEffectiveTLDs(path
):
53 file = codecs
.open(path
, "r", "UTF-8")
56 line
= file.readline()
57 # line always contains a line terminator unless the file is empty
61 # comment, empty, or superfluous line for explicitness purposes
62 if line
.startswith("//") or "." not in line
:
64 line
= re
.split(r
"[ \t\n]", line
, 1)[0]
65 entry
= EffectiveTLDEntry(line
)
66 domain
= entry
.domain()
67 assert domain
not in domains
, \
68 "repeating domain %s makes no sense" % domain
72 def _normalizeHostname(domain
):
74 Normalizes the given domain, component by component. ASCII components are
75 lowercased, while non-ASCII components are processed using the ToASCII
78 def convertLabel(label
):
81 return encodings
.idna
.ToASCII(label
)
82 return ".".join(map(convertLabel
, domain
.split(".")))
85 "True if s consists entirely of ASCII characters, false otherwise."
91 class EffectiveTLDEntry
:
93 Stores an entry in an effective-TLD name file.
99 def __init__(self
, line
):
101 Creates a TLD entry from a line of data, which must have been stripped of
104 if line
.startswith("!"):
105 self
._exception
= True
107 elif line
.startswith("*."):
112 self
._domain
= _normalizeHostname(domain
)
115 "The domain this represents."
119 "True if this entry's domain denotes does not denote an effective TLD."
120 return self
._exception
123 "True if this entry represents a class of effective TLDs."
133 argv[1] is the effective TLD file to parse.
134 A C++ array of { domain, exception, wild } entries representing the
135 eTLD file is then printed to stdout.
144 for etld
in getEffectiveTLDs(sys
.argv
[1]):
145 exception
= boolStr(etld
.exception())
146 wild
= boolStr(etld
.wild())
147 print ' { "%s", %s, %s },' % (etld
.domain(), exception
, wild
)
148 print " { nsnull, PR_FALSE, PR_FALSE }"
151 if __name__
== '__main__':