1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 from html
.parser
import HTMLParser
9 import mozpack
.path
as mozpath
11 from fluent
.syntax
import ast
, parse
, visitor
12 from mozlint
import result
13 from mozlint
.pathutils
import expand_exclusions
16 class TextElementHTMLParser(HTMLParser
):
17 """HTML Parser for TextElement.
19 TextElements may contain embedded html tags, which can include
20 quotes in attributes. We only want to check the actual text.
25 self
.extracted_text
= []
27 def handle_data(self
, data
):
28 self
.extracted_text
.append(data
)
31 class Linter(visitor
.Visitor
):
32 """Fluent linter implementation.
34 This subclasses the Fluent AST visitor. Methods are called corresponding
35 to each type of node in the Fluent AST. It is possible to control
36 whether a node is recursed into by calling the generic_visit method on
39 See the documentation here:
40 https://www.projectfluent.org/python-fluent/fluent.syntax/stable/usage.html
44 self
, path
, config
, exclusions
, contents
, offsets_and_lines
, brand_names
=[]
49 self
.exclusions
= exclusions
50 self
.contents
= contents
51 self
.offsets_and_lines
= offsets_and_lines
54 self
.identifier_re
= re
.compile(r
"[a-z0-9-]+")
55 self
.apostrophe_re
= re
.compile(r
"\w'")
56 self
.incorrect_apostrophe_re
= re
.compile(r
"\w\u2018\w")
57 self
.single_quote_re
= re
.compile(r
"'(.+)'")
58 self
.double_quote_re
= re
.compile(r
"\".+\"")
59 self.ellipsis_re = re.compile(r"\
.\
.\
.")
61 self.brand_names = brand_names
62 self.minimum_id_length = 9
65 # The resource comment should be at the top of the page after the license.
66 "node_can_be_resource_comment
": True,
67 # Group comments must be followed by a message. Two group comments are not
69 "can_have_group_comment
": True,
70 # Comment bound to the current message
72 # The current group comment
74 # Variables in the current message
90 # For XUL key/command setup.
93 # For download filenames:
95 # Used in the Firefox prefs
97 # Used by search-textbox.js
99 # Used in toolbar customization.
101 # Used in moz-message-bar.
103 # Used in dialogs (should be moved to using fluent IDs though)
105 "buttonaccesskeyaccept
",
107 "buttonaccesskeycancel
",
109 "buttonaccesskeyextra2
",
110 # Used in app menu notifications (should be moved to use fluent IDs)
113 "secondarybuttonlabel
",
114 "secondarybuttonaccesskey
",
115 # Commonly used in Lit-based web components
119 self.known_attribute_list = [a.lower() for a in attributes]
121 # Set this to true to debug print the root node's json. This is useful for
122 # writing new lint rules, or debugging existing ones.
123 self.debug_print_json = False
125 def generic_visit(self, node):
126 node_name = type(node).__name__
127 self.state["node_can_be_resource_comment
"] = self.state[
128 "node_can_be_resource_comment
"
130 # This is the root node.
131 node_name == "Resource
"
132 # Empty space is allowed.
133 or node_name == "Span
"
134 # Comments are allowed
135 or node_name == "Comment
"
138 if self.debug_print_json:
141 print(json.dumps(node.to_json(), indent=2))
142 # Only debug print the root node.
143 self.debug_print_json = False
145 super(Linter, self).generic_visit(node)
147 def visit_Attribute(self, node):
148 # Only visit values for Attribute nodes, the identifier comes from dom.
149 super().generic_visit(node.value)
151 def visit_FunctionReference(self, node):
152 # We don't recurse into function references, the identifiers there are
153 # allowed to be free form.
156 def visit_Message(self, node):
157 # There must be at least one message or term between group comments.
158 self.state["can_have_group_comment
"] = True
159 self.last_message_id = node.id.name
161 super().generic_visit(node)
163 # Do this here instead as visit_Attribute doesn't have access to the
165 for attr in node.attributes:
166 if not attr.id.name.lower() in self.known_attribute_list:
167 comment = self.state["comment
"] + self.state["group_comment
"]
168 if not f".{attr
.id.name
}" in comment:
172 "Use attributes designed
for localized content directly
."
173 " If script
-based processing
is necessary
, add a comment
"
174 f" explaining why
. The linter didn
't recognize: .{attr.id.name}",
178 # Check if variables are referenced in comments
179 if self.state["variables"]:
180 comments = self.state["comment"] + self.state["group_comment"]
181 missing_references = [
182 v for v in self.state["variables"] if f"${v}" not in comments
184 if missing_references:
188 "Messages including variables should have a comment "
189 "explaining what will replace the variable. "
190 "Missing references: "
191 + ", ".join([f"${m}" for m in missing_references]),
194 # Reset current comment and variable references after reading the
196 self.state["comment"] = ""
197 self.state["variables"] = []
199 def visit_Term(self, node):
200 # There must be at least one message or term between group comments.
201 self.state["can_have_group_comment"] = True
202 self.last_message_id = None
204 super().generic_visit(node)
206 # Reset current comment and variable references after reading the term.
207 self.state["comment"] = ""
208 self.state["variables"] = []
210 def visit_MessageReference(self, node):
211 # We don't recurse into message references
, the identifiers are either
212 # checked elsewhere or are attributes and come from DOM.
215 def visit_Identifier(self
, node
):
217 self
.path
not in self
.exclusions
["ID01"]["files"]
218 and node
.name
not in self
.exclusions
["ID01"]["messages"]
219 and not self
.identifier_re
.fullmatch(node
.name
)
224 "Identifiers may only contain lowercase characters and -",
227 len(node
.name
) < self
.minimum_id_length
228 and self
.path
not in self
.exclusions
["ID02"]["files"]
229 and node
.name
not in self
.exclusions
["ID02"]["messages"]
234 f
"Identifiers must be at least {self.minimum_id_length} characters long",
237 def visit_TextElement(self
, node
):
238 parser
= TextElementHTMLParser()
239 parser
.feed(node
.value
)
240 for text
in parser
.extracted_text
:
241 # To check for apostrophes, first remove pairs of straight quotes
242 # used as delimiters.
243 cleaned_str
= re
.sub(self
.single_quote_re
, "\1", node
.value
)
244 if self
.apostrophe_re
.search(cleaned_str
):
248 "Strings with apostrophes should use foo\u2019s instead of foo's.",
250 if self
.incorrect_apostrophe_re
.search(text
):
254 "Strings with apostrophes should use foo\u2019s instead of foo\u2018s.",
256 if self
.single_quote_re
.search(text
):
260 "Single-quoted strings should use Unicode \u2018foo\u2019 instead of 'foo'.",
262 if self
.double_quote_re
.search(text
):
266 'Double-quoted strings should use Unicode \u201cfoo\u201d instead of "foo".',
268 if self
.ellipsis_re
.search(text
):
272 "Strings with an ellipsis should use the Unicode \u2026 character"
273 " instead of three periods",
276 # If part of a message, check for brand names
278 self
.last_message_id
is not None
279 and self
.path
not in self
.exclusions
["CO01"]["files"]
280 and self
.last_message_id
not in self
.exclusions
["CO01"]["messages"]
283 for brand
in self
.brand_names
:
285 found_brands
.append(brand
)
290 "Strings should use the corresponding terms instead of"
291 f
" hard-coded brand names ({', '.join(found_brands)})",
294 def visit_ResourceComment(self
, node
):
295 # This node is a comment with: "###"
296 if not self
.state
["node_can_be_resource_comment"]:
300 "Resource comments (###) should be placed at the top of the file, just "
301 "after the license header. There should only be one resource comment "
306 lines_after
= get_newlines_count_after(node
.span
, self
.contents
)
307 lines_before
= get_newlines_count_before(node
.span
, self
.contents
)
309 if node
.span
.end
== len(self
.contents
) - 1:
310 # This file only contains a resource comment.
317 "Resource comments (###) should be followed by one empty line.",
321 if lines_before
!= 2:
325 "Resource comments (###) should have one empty line above them.",
329 def visit_SelectExpression(self
, node
):
330 # We only want to visit the variant values, the identifiers in selectors
331 # and keys are allowed to be free form.
332 for variant
in node
.variants
:
333 super().generic_visit(variant
.value
)
335 # Store the variable used for the SelectExpression, excluding functions
338 type(node
.selector
) is ast
.VariableReference
339 and node
.selector
.id.name
not in self
.state
["variables"]
341 self
.state
["variables"].append(node
.selector
.id.name
)
343 def visit_Comment(self
, node
):
344 # This node is a comment with: "#"
347 self
.state
["comment"] = node
.content
349 def visit_GroupComment(self
, node
):
350 # This node is a comment with: "##"
352 # Store the group comment
353 self
.state
["group_comment"] = node
.content
355 if not self
.state
["can_have_group_comment"]:
359 "Group comments (##) must be followed by at least one message "
360 "or term. Make sure that a single group comment with multiple "
361 "paragraphs is not separated by whitespace, as it will be "
362 "interpreted as two different comments.",
366 self
.state
["can_have_group_comment"] = False
368 lines_after
= get_newlines_count_after(node
.span
, self
.contents
)
369 lines_before
= get_newlines_count_before(node
.span
, self
.contents
)
371 if node
.span
.end
== len(self
.contents
) - 1:
372 # The group comment is the last thing in the file.
374 if node
.content
== "":
375 # Empty comments are allowed at the end of the file.
381 "Group comments (##) should not be at the end of the file, they should "
382 "always be above a message. Only an empty group comment is allowed at "
383 "the end of a file.",
391 "Group comments (##) should be followed by one empty line.",
395 if lines_before
!= 2:
399 "Group comments (##) should have an empty line before them.",
403 def visit_VariableReference(self
, node
):
404 # Identifiers are allowed to be free form, but need to store them
405 # for comment checks.
407 if node
.id.name
not in self
.state
["variables"]:
408 self
.state
["variables"].append(node
.id.name
)
410 def add_error(self
, node
, rule
, msg
, level
=None):
411 (col
, line
) = self
.span_to_line_and_col(node
.span
)
422 self
.results
.append(result
.from_config(self
.config
, **res
))
424 def span_to_line_and_col(self
, span
):
425 i
= bisect
.bisect_left(self
.offsets_and_lines
, (span
.start
, 0))
427 col
= span
.start
- self
.offsets_and_lines
[i
- 1][0]
430 return (col
, self
.offsets_and_lines
[i
][1])
433 def get_offsets_and_lines(contents
):
434 """Return a list consisting of tuples of (offset, line).
436 The Fluent AST contains spans of start and end offsets in the file.
437 This function returns a list of offsets and line numbers so that errors
438 can be reported using line and column.
442 for m
in re
.finditer(r
"\n", contents
):
443 result
.append((m
.start(), line
))
448 def get_newlines_count_after(span
, contents
):
449 # Determine the number of newlines.
451 for i
in range(span
.end
, len(contents
)):
452 assert contents
[i
] != "\r", "This linter does not handle \\r characters."
453 if contents
[i
] != "\n":
460 def get_newlines_count_before(span
, contents
):
461 # Determine the range of newline characters.
463 for i
in range(span
.start
- 1, 0, -1):
464 assert contents
[i
] != "\r", "This linter does not handle \\r characters."
465 if contents
[i
] != "\n":
472 def get_exclusions(root
):
474 mozpath
.join(root
, "tools", "lint", "fluent-lint", "exclusions.yml")
476 exclusions
= list(yaml
.safe_load_all(f
))[0]
477 for error_type
in exclusions
:
478 exclusions
[error_type
]["files"] = set(
479 [mozpath
.join(root
, x
) for x
in exclusions
[error_type
]["files"]]
484 def get_branding_list(root
, brand_files
):
485 class MessageExtractor(visitor
.Visitor
):
488 self
.last_message_id
= None
490 def visit_Term(self
, node
):
491 self
.last_message_id
= node
.id.name
492 self
.generic_visit(node
)
494 def visit_TextElement(self
, node
):
495 if self
.last_message_id
:
496 self
.brands
+= [node
.value
]
497 self
.last_message_id
= None
498 self
.generic_visit(node
)
500 extractor
= MessageExtractor()
502 for brand_path
in brand_files
:
503 brand_file
= mozpath
.join(root
, brand_path
)
504 if os
.path
.exists(brand_file
):
505 with
open(brand_file
, encoding
="utf-8") as f
:
506 messages
= parse(f
.read())
507 extractor
.visit(messages
)
509 return list(set(extractor
.brands
))
512 def lint(paths
, config
, fix
=None, **lintargs
):
513 root
= lintargs
["root"]
514 files
= list(expand_exclusions(paths
, config
, root
))
515 exclusions
= get_exclusions(root
)
516 brand_files
= config
.get("brand-files")
517 brand_names
= get_branding_list(root
, brand_files
)
520 contents
= open(path
, "r", encoding
="utf-8").read()
526 get_offsets_and_lines(contents
),
529 linter
.visit(parse(contents
))
530 results
.extend(linter
.results
)