2 # SPDX-License-Identifier: GPL-2.0
3 # Copyright Thomas Gleixner <tglx@linutronix.de>
5 from argparse
import ArgumentParser
6 from ply
import lex
, yacc
14 class ParserException(Exception):
15 def __init__(self
, tok
, txt
):
19 class SPDXException(Exception):
20 def __init__(self
, el
, txt
):
24 class SPDXdata(object):
26 self
.license_files
= 0
27 self
.exception_files
= 0
31 # Read the spdx data from the LICENSES directory
32 def read_spdxdata(repo
):
34 # The subdirectories of LICENSES in the kernel source
35 license_dirs
= [ "preferred", "other", "exceptions" ]
36 lictree
= repo
.head
.commit
.tree
['LICENSES']
40 for d
in license_dirs
:
41 for el
in lictree
[d
].traverse():
42 if not os
.path
.isfile(el
.path
):
46 for l
in open(el
.path
).readlines():
47 if l
.startswith('Valid-License-Identifier:'):
48 lid
= l
.split(':')[1].strip().upper()
49 if lid
in spdx
.licenses
:
50 raise SPDXException(el
, 'Duplicate License Identifier: %s' %lid
)
52 spdx
.licenses
.append(lid
)
54 elif l
.startswith('SPDX-Exception-Identifier:'):
55 exception
= l
.split(':')[1].strip().upper()
56 spdx
.exceptions
[exception
] = []
58 elif l
.startswith('SPDX-Licenses:'):
59 for lic
in l
.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
60 if not lic
in spdx
.licenses
:
61 raise SPDXException(None, 'Exception %s missing license %s' %(ex
, lic
))
62 spdx
.exceptions
[exception
].append(lic
)
64 elif l
.startswith("License-Text:"):
66 if not len(spdx
.exceptions
[exception
]):
67 raise SPDXException(el
, 'Exception %s is missing SPDX-Licenses' %excid)
68 spdx
.exception_files
+= 1
70 spdx
.license_files
+= 1
74 class id_parser(object):
76 reserved
= [ 'AND', 'OR', 'WITH' ]
77 tokens
= [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
79 precedence
= ( ('nonassoc', 'AND', 'OR'), )
83 def __init__(self
, spdx
):
87 self
.lexer
= lex
.lex(module
= self
, reflags
= re
.UNICODE
)
88 # Initialize the parser. No debug file and no parser rules stored on disk
89 # The rules are small enough to be generated on the fly
90 self
.parser
= yacc
.yacc(module
= self
, write_tables
= False, debug
= False)
91 self
.lines_checked
= 0
98 # Validate License and Exception IDs
99 def validate(self
, tok
):
100 id = tok
.value
.upper()
102 if not id in self
.spdx
.licenses
:
103 raise ParserException(tok
, 'Invalid License ID')
105 elif tok
.type == 'EXC':
106 if id not in self
.spdx
.exceptions
:
107 raise ParserException(tok
, 'Invalid Exception ID')
108 if self
.lastid
not in self
.spdx
.exceptions
[id]:
109 raise ParserException(tok
, 'Exception not valid for license %s' %self
.lastid
)
111 elif tok
.type != 'WITH':
115 def t_RPAR(self
, tok
):
117 self
.lasttok
= tok
.type
120 def t_LPAR(self
, tok
):
122 self
.lasttok
= tok
.type
128 if self
.lasttok
== 'EXC':
130 raise ParserException(tok
, 'Missing parentheses')
132 tok
.value
= tok
.value
.strip()
133 val
= tok
.value
.upper()
135 if val
in self
.reserved
:
137 elif self
.lasttok
== 'WITH':
140 self
.lasttok
= tok
.type
144 def t_error(self
, tok
):
145 raise ParserException(tok
, 'Invalid token')
155 def p_error(self
, p
):
157 raise ParserException(None, 'Unfinished license expression')
159 raise ParserException(p
, 'Syntax error')
161 def parse(self
, expr
):
164 self
.parser
.parse(expr
, lexer
= self
.lexer
)
166 def parse_lines(self
, fd
, maxlines
, fname
):
171 line
= line
.decode(locale
.getpreferredencoding(False), errors
='ignore')
173 if self
.curline
> maxlines
:
175 self
.lines_checked
+= 1
176 if line
.find("SPDX-License-Identifier:") < 0:
178 expr
= line
.split(':')[1].replace('*/', '').strip()
182 # Should we check for more SPDX ids in the same file and
183 # complain if there are any?
187 except ParserException
as pe
:
189 col
= line
.find(expr
) + pe
.tok
.lexpos
191 sys
.stdout
.write('%s: %d:%d %s: %s\n' %(fname
, self
.curline
, col
, pe
.txt
, tok
))
193 sys
.stdout
.write('%s: %d:0 %s\n' %(fname
, self
.curline
, col
, pe
.txt
))
194 self
.spdx_errors
+= 1
196 def scan_git_tree(tree
):
197 for el
in tree
.traverse():
198 # Exclude stuff which would make pointless noise
199 # FIXME: Put this somewhere more sensible
200 if el
.path
.startswith("LICENSES"):
202 if el
.path
.find("license-rules.rst") >= 0:
204 if not os
.path
.isfile(el
.path
):
206 with
open(el
.path
, 'rb') as fd
:
207 parser
.parse_lines(fd
, args
.maxlines
, el
.path
)
209 def scan_git_subtree(tree
, path
):
210 for p
in path
.strip('/').split('/'):
214 if __name__
== '__main__':
216 ap
= ArgumentParser(description
='SPDX expression checker')
217 ap
.add_argument('path', nargs
='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
218 ap
.add_argument('-m', '--maxlines', type=int, default
=15,
219 help='Maximum number of lines to scan in a file. Default 15')
220 ap
.add_argument('-v', '--verbose', action
='store_true', help='Verbose statistics output')
221 args
= ap
.parse_args()
223 # Sanity check path arguments
224 if '-' in args
.path
and len(args
.path
) > 1:
225 sys
.stderr
.write('stdin input "-" must be the only path argument\n')
229 # Use git to get the valid license expressions
230 repo
= git
.Repo(os
.getcwd())
233 # Initialize SPDX data
234 spdx
= read_spdxdata(repo
)
236 # Initilize the parser
237 parser
= id_parser(spdx
)
239 except SPDXException
as se
:
241 sys
.stderr
.write('%s: %s\n' %(se
.el
.path
, se
.txt
))
243 sys
.stderr
.write('%s\n' %se.txt
)
246 except Exception as ex
:
247 sys
.stderr
.write('FAIL: %s\n' %ex)
248 sys
.stderr
.write('%s\n' %traceback
.format_exc())
252 if len(args
.path
) and args
.path
[0] == '-':
253 stdin
= os
.fdopen(sys
.stdin
.fileno(), 'rb')
254 parser
.parse_lines(stdin
, args
.maxlines
, '-')
258 if os
.path
.isfile(p
):
259 parser
.parse_lines(open(p
, 'rb'), args
.maxlines
, p
)
260 elif os
.path
.isdir(p
):
261 scan_git_subtree(repo
.head
.reference
.commit
.tree
, p
)
263 sys
.stderr
.write('path %s does not exist\n' %p
)
267 scan_git_tree(repo
.head
.commit
.tree
)
270 sys
.stderr
.write('\n')
271 sys
.stderr
.write('License files: %12d\n' %spdx
.license_files
)
272 sys
.stderr
.write('Exception files: %12d\n' %spdx
.exception_files
)
273 sys
.stderr
.write('License IDs %12d\n' %len(spdx
.licenses
))
274 sys
.stderr
.write('Exception IDs %12d\n' %len(spdx
.exceptions
))
275 sys
.stderr
.write('\n')
276 sys
.stderr
.write('Files checked: %12d\n' %parser
.checked
)
277 sys
.stderr
.write('Lines checked: %12d\n' %parser
.lines_checked
)
278 sys
.stderr
.write('Files with SPDX: %12d\n' %parser
.spdx_valid
)
279 sys
.stderr
.write('Files with errors: %12d\n' %parser
.spdx_errors
)
283 except Exception as ex
:
284 sys
.stderr
.write('FAIL: %s\n' %ex)
285 sys
.stderr
.write('%s\n' %traceback
.format_exc())