3 This script helps the RM to maintain config/chroot_local-includes/usr/share/tails/browser-localization/descriptions
5 It does so in two ways:
6 - the 'generate' subcommand will generate content for the
7 descriptions file, based on information in po/po-to-mozilla.toml
8 - the 'suggest' subcommand will help the RM adding more lines
9 to po/po-to-mozilla.toml
19 from argparse import ArgumentParser
20 from collections import defaultdict
21 from pathlib import Path
22 from urllib.parse import urljoin
23 from urllib.request import urlretrieve
29 from BeautifulSoup import BeautifulSoup
31 from bs4 import BeautifulSoup
34 LANGUAGE_RE = re.compile(r'^"Language:\s+(.*)\\n"$')
35 MAPFILE = "po/po-to-mozilla.toml"
36 TEMPDIR = Path("tmp/")
38 "config/chroot_local-includes/usr/share/tails/tbb-sha256sums.txt"
41 "config/chroot_local-includes/usr/share/tails/tbb-dist-url.txt"
46 def get_torbrowser_filename() -> str:
47 with TBB_SHASUMS.open() as buf:
49 if line.split(".")[-2] == "tar":
50 match = re.match(r"^\w+\s+(.*)$", line.strip())
52 raise Exception("tor browser tarball not found in tbb-sha256sums.txt")
55 def get_torbrowser_tarball() -> Path:
56 filepath = TEMPDIR / get_torbrowser_filename()
57 if not filepath.exists():
58 TEMPDIR.mkdir(exist_ok=True)
59 prefix = TBB_PREFIX.open().read().strip()
60 if not prefix.startswith(("https://", "http://")):
61 raise Exception("not a valid url: %s" % prefix)
62 url = urljoin(prefix, get_torbrowser_filename())
63 urlretrieve(url, filename=filepath) # noqa: S310
65 with contextlib.chdir(TEMPDIR):
66 subprocess.check_output(["/usr/bin/sha256sum", "-c", str(TBB_SHASUMS)])
70 def get_torbrowser_languages() -> list[str]:
71 tarball = get_torbrowser_tarball()
72 tar = tarfile.open(tarball)
73 omnija_raw = tar.extractfile("tor-browser/Browser/omni.ja")
74 omnija = zipfile.ZipFile(omnija_raw)
75 contents = omnija.read("res/multilocale.txt").decode("ascii")
76 locales = [locale.strip() for locale in contents.split(",") if locale]
81 def get_language(pofile: Path) -> str:
83 Get language name associated with the pofile.
85 Please note that this might be a language name (ie: `it` for Italian)
86 or a full locale (ie: `pt_BR` for Brazilian Portuguese).
88 for line in pofile.open():
89 match = LANGUAGE_RE.match(line)
92 raise ValueError(f"Could not extract language from file {pofile}")
95 def locale_to_mozilla(locale: str) -> str:
97 >>> locale_to_mozilla('ar_EG')
99 >>> locale_to_mozilla('it')
100 Traceback (most recent call last):
102 ValueError: country not specified in 'it'
103 >>> locale_to_mozilla('ar_EG:XX')
104 Traceback (most recent call last):
106 ValueError: The input format is invalid; you can't both have underscores and colons
107 >>> locale_to_mozilla('ar:EG')
113 "The input format is invalid; you can't both have underscores and colons",
116 if "_" not in locale:
117 raise ValueError("country not specified in '%s'" % locale)
118 lang, country = locale.split("_")
119 return f"{lang}-{country.upper()}:{country.upper()}"
124 This class fetches a list of all possible locale.
128 self.locales = self.parse_table(
129 requests.get("https://lh.2xlibre.net/locales/", timeout=10).text,
131 self.languages = defaultdict(list)
132 for locale in self.locales:
133 self.languages[locale.split("_")[0]].append(locale)
135 def parse_table(self, body):
136 dom = BeautifulSoup(body, features="lxml")
138 for row in dom.find_all("tr"):
139 locale = row.select("td:first-child > a")[0].string
140 language = row.select("td:nth-child(2)")[0].string.strip("— ")
141 country = row.select("td:nth-child(3)")[0].string or ""
142 ret[locale] = (language, country.title())
146 class LocaleDescriptions:
149 self.languages_not_found = set()
150 with open(MAPFILE) as buf:
151 self.po_to_mozilla = toml.load(buf)
153 def get_all_available_locales(self) -> set[tuple[str, str, str]]:
156 for po in sorted(Path("po/").glob("*.po")):
157 moz_locale = get_language(po)
158 moz_locales.add(moz_locale)
159 ret.add((str(po), moz_locale, "po/"))
161 for loc in get_torbrowser_languages():
162 moz_locale = loc.replace("-", "_")
163 if moz_locale not in moz_locales:
164 moz_locales.add(moz_locale)
165 ret.add((moz_locale, moz_locale, "tbb"))
169 def get_all_mozlocales(self, warnings=True):
170 yield from self.po_to_mozilla.get("extra", {}).get("extra_languages", [])
171 for po, moz_locale, source in self.get_all_available_locales():
172 if "_" in moz_locale:
173 # See contribute/release_process/update_locale_descriptions#with-underscore
174 lang, sub = moz_locale.split("_", maxsplit=1)
175 yield f"{lang}-{sub}:{sub}"
176 elif moz_locale in self.po_to_mozilla["map"]:
177 # We've already met this, and encoded it in po-to-mozilla.toml
178 value = self.po_to_mozilla["map"][moz_locale]
179 values = [value] if isinstance(value, str) else value
180 for locale in values:
181 yield locale_to_mozilla(locale)
183 # It's probably a new language
186 f"Could not find {moz_locale} (from {po}({source})), "
187 f"please add it to {MAPFILE}",
191 self.languages_not_found.add(moz_locale)
193 def get_suggestions(self):
195 This encodes contribute/release_process/update_locale_descriptions#no-underscore
197 if not self.languages_not_found:
199 valid_locales = ValidLocales()
202 for lang in sorted(self.languages_not_found):
203 locales = valid_locales.languages[lang]
204 if len(locales) == 1:
205 # If there is a single locale for this language, then it's a no-brainer
207 details = ", ".join(valid_locales.locales[locale])
208 suggested_add += f'{lang}="{locale_to_mozilla(locale)}" # {details}\n'
210 # Otherwise, the RM must manually follow the process detailed in
211 # in contribute/release_process/update_locale_descriptions.mdwn
212 others += f"{lang}: pick between\n"
213 for locale in locales:
214 details = ", ".join(valid_locales.locales[locale])
215 others += f' {lang}="{locale_to_mozilla(locale)}" # {details}\n'
219 "\n\n## You can add the following block as-is,"
220 " but please verify it first!\n"
222 text += suggested_add
228 p.set_defaults(mode="")
229 sub = p.add_subparsers()
230 generate = sub.add_parser("generate")
231 generate.set_defaults(mode="generate")
232 suggest = sub.add_parser("suggest")
233 suggest.set_defaults(mode="suggest")
234 doctest = sub.add_parser("doctest")
235 doctest.add_argument("-v", "--verbose", action="store_true", default=False)
236 doctest.set_defaults(mode="doctest")
242 parser = get_parser()
243 args = parser.parse_args()
244 helper = LocaleDescriptions()
247 print(parser.error("You need to specify a subcommand"))
248 elif args.mode == "doctest":
251 doctest.testmod(verbose=args.verbose)
254 set(helper.get_all_mozlocales(warnings=(args.mode == "generate")))
256 if args.mode == "generate":
257 for out in mozlocales:
261 elif args.mode == "suggest":
262 if not helper.n_errors:
264 suggestion = helper.get_suggestions()
265 print(suggestion, file=sys.stderr)
269 if __name__ == "__main__":