1 # SPDX-License-Identifier: GPL-2.0
2 # Copyright 2019 Jonathan Corbet <corbet@lwn.net>
4 # Apply kernel-specific tweaks after the initial document processing
7 from docutils
import nodes
9 from sphinx
import addnodes
10 from sphinx
.errors
import NoUri
12 from itertools
import chain
15 # Python 2 lacks re.ASCII...
19 except AttributeError:
23 # Regex nastiness. Of course.
24 # Try to identify "function()" that's not already marked up some
25 # other way. Sphinx doesn't like a lot of stuff right after a
26 # :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last
27 # bit tries to restrict matches to things that won't create trouble.
29 RE_function
= re
.compile(r
'\b(([a-zA-Z_]\w+)\(\))', flags
=ascii_p3
)
32 # Sphinx 2 uses the same :c:type role for struct, union, enum and typedef
34 RE_generic_type
= re
.compile(r
'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)',
38 # Sphinx 3 uses a different C role for each one of struct, union, enum and
41 RE_struct
= re
.compile(r
'\b(struct)\s+([a-zA-Z_]\w+)', flags
=ascii_p3
)
42 RE_union
= re
.compile(r
'\b(union)\s+([a-zA-Z_]\w+)', flags
=ascii_p3
)
43 RE_enum
= re
.compile(r
'\b(enum)\s+([a-zA-Z_]\w+)', flags
=ascii_p3
)
44 RE_typedef
= re
.compile(r
'\b(typedef)\s+([a-zA-Z_]\w+)', flags
=ascii_p3
)
47 # Detects a reference to a documentation page of the form Documentation/... with
48 # an optional extension
50 RE_doc
= re
.compile(r
'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)')
52 RE_namespace
= re
.compile(r
'^\s*..\s*c:namespace::\s*(\S+)\s*$')
55 # Reserved C words that we should skip when cross-referencing
57 Skipnames
= [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ]
61 # Many places in the docs refer to common system calls. It is
62 # pointless to try to cross-reference them and, as has been known
63 # to happen, somebody defining a function by these names can lead
64 # to the creation of incorrect and confusing cross references. So
65 # just don't even try with these names.
67 Skipfuncs
= [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap',
68 'select', 'poll', 'fork', 'execve', 'clone', 'ioctl',
74 # Detect references to commits.
76 RE_git
= re
.compile(r
'commit\s+(?P<rev>[0-9a-f]{12,40})(?:\s+\(".*?"\))?',
77 flags
=re
.IGNORECASE | re
.DOTALL
)
79 def markup_refs(docname
, app
, node
):
84 # Associate each regex with the function that will markup its matches
86 markup_func_sphinx2
= {RE_doc
: markup_doc_ref
,
87 RE_function
: markup_c_ref
,
88 RE_generic_type
: markup_c_ref
}
90 markup_func_sphinx3
= {RE_doc
: markup_doc_ref
,
91 RE_function
: markup_func_ref_sphinx3
,
92 RE_struct
: markup_c_ref
,
93 RE_union
: markup_c_ref
,
94 RE_enum
: markup_c_ref
,
95 RE_typedef
: markup_c_ref
,
98 if sphinx
.version_info
[0] >= 3:
99 markup_func
= markup_func_sphinx3
101 markup_func
= markup_func_sphinx2
103 match_iterators
= [regex
.finditer(t
) for regex
in markup_func
]
105 # Sort all references by the starting position in text
107 sorted_matches
= sorted(chain(*match_iterators
), key
=lambda m
: m
.start())
108 for m
in sorted_matches
:
110 # Include any text prior to match as a normal text node.
113 repl
.append(nodes
.Text(t
[done
:m
.start()]))
116 # Call the function associated with the regex that matched this text and
117 # append its return to the text
119 repl
.append(markup_func
[m
.re
](docname
, app
, m
))
123 repl
.append(nodes
.Text(t
[done
:]))
127 # Keep track of cross-reference lookups that failed so we don't have to
131 def failure_seen(target
):
132 return (target
) in failed_lookups
133 def note_failure(target
):
134 failed_lookups
[target
] = True
137 # In sphinx3 we can cross-reference to C macro and function, each one with its
138 # own C role, but both match the same regex, so we try both.
140 def markup_func_ref_sphinx3(docname
, app
, match
):
141 cdom
= app
.env
.domains
['c']
143 # Go through the dance of getting an xref out of the C domain
145 base_target
= match
.group(2)
146 target_text
= nodes
.Text(match
.group(0))
148 possible_targets
= [base_target
]
149 # Check if this document has a namespace, and if so, try
150 # cross-referencing inside it first.
152 possible_targets
.insert(0, c_namespace
+ "." + base_target
)
154 if base_target
not in Skipnames
:
155 for target
in possible_targets
:
156 if (target
not in Skipfuncs
) and not failure_seen(target
):
157 lit_text
= nodes
.literal(classes
=['xref', 'c', 'c-func'])
158 lit_text
+= target_text
159 pxref
= addnodes
.pending_xref('', refdomain
= 'c',
160 reftype
= 'function',
165 # XXX The Latex builder will throw NoUri exceptions here,
166 # work around that by ignoring them.
169 xref
= cdom
.resolve_xref(app
.env
, docname
, app
.builder
,
170 'function', target
, pxref
,
181 def markup_c_ref(docname
, app
, match
):
182 class_str
= {# Sphinx 2 only
183 RE_function
: 'c-func',
184 RE_generic_type
: 'c-type',
186 RE_struct
: 'c-struct',
189 RE_typedef
: 'c-type',
191 reftype_str
= {# Sphinx 2 only
192 RE_function
: 'function',
193 RE_generic_type
: 'type',
201 cdom
= app
.env
.domains
['c']
203 # Go through the dance of getting an xref out of the C domain
205 base_target
= match
.group(2)
206 target_text
= nodes
.Text(match
.group(0))
208 possible_targets
= [base_target
]
209 # Check if this document has a namespace, and if so, try
210 # cross-referencing inside it first.
212 possible_targets
.insert(0, c_namespace
+ "." + base_target
)
214 if base_target
not in Skipnames
:
215 for target
in possible_targets
:
216 if not (match
.re
== RE_function
and target
in Skipfuncs
):
217 lit_text
= nodes
.literal(classes
=['xref', 'c', class_str
[match
.re
]])
218 lit_text
+= target_text
219 pxref
= addnodes
.pending_xref('', refdomain
= 'c',
220 reftype
= reftype_str
[match
.re
],
221 reftarget
= target
, modname
= None,
224 # XXX The Latex builder will throw NoUri exceptions here,
225 # work around that by ignoring them.
228 xref
= cdom
.resolve_xref(app
.env
, docname
, app
.builder
,
229 reftype_str
[match
.re
], target
, pxref
,
240 # Try to replace a documentation reference of the form Documentation/... with a
241 # cross reference to that page
243 def markup_doc_ref(docname
, app
, match
):
244 stddom
= app
.env
.domains
['std']
246 # Go through the dance of getting an xref out of the std domain
248 absolute
= match
.group(1)
249 target
= match
.group(2)
251 target
= "/" + target
253 pxref
= addnodes
.pending_xref('', refdomain
= 'std', reftype
= 'doc',
254 reftarget
= target
, modname
= None,
255 classname
= None, refexplicit
= False)
257 # XXX The Latex builder will throw NoUri exceptions here,
258 # work around that by ignoring them.
261 xref
= stddom
.resolve_xref(app
.env
, docname
, app
.builder
, 'doc',
266 # Return the xref if we got it; otherwise just return the plain text.
271 return nodes
.Text(match
.group(0))
273 def get_c_namespace(app
, docname
):
274 source
= app
.env
.doc2path(docname
)
275 with
open(source
) as f
:
277 match
= RE_namespace
.search(l
)
279 return match
.group(1)
282 def markup_git(docname
, app
, match
):
283 # While we could probably assume that we are running in a git
284 # repository, we can't know for sure, so let's just mechanically
285 # turn them into git.kernel.org links without checking their
286 # validity. (Maybe we can do something in the future to warn about
287 # these references if this is explicitly requested.)
288 text
= match
.group(0)
289 rev
= match
.group('rev')
290 return nodes
.reference('', nodes
.Text(text
),
291 refuri
=f
'https://git.kernel.org/torvalds/c/{rev}')
293 def auto_markup(app
, doctree
, name
):
295 c_namespace
= get_c_namespace(app
, name
)
296 def text_but_not_a_reference(node
):
297 # The nodes.literal test catches ``literal text``, its purpose is to
298 # avoid adding cross-references to functions that have been explicitly
299 # marked with cc:func:.
300 if not isinstance(node
, nodes
.Text
) or isinstance(node
.parent
, nodes
.literal
):
303 child_of_reference
= False
306 if isinstance(parent
, nodes
.Referential
):
307 child_of_reference
= True
309 parent
= parent
.parent
310 return not child_of_reference
313 # This loop could eventually be improved on. Someday maybe we
314 # want a proper tree traversal with a lot of awareness of which
315 # kinds of nodes to prune. But this works well for now.
317 for para
in doctree
.traverse(nodes
.paragraph
):
318 for node
in para
.traverse(condition
=text_but_not_a_reference
):
319 node
.parent
.replace(node
, markup_refs(name
, app
, node
))
322 app
.connect('doctree-resolved', auto_markup
)
324 'parallel_read_safe': True,
325 'parallel_write_safe': True,