2 # SPDX-License-Identifier: GPL-2.0
3 # Copyright Thomas Gleixner <tglx@linutronix.de>
5 from argparse
import ArgumentParser
6 from ply
import lex
, yacc
15 class ParserException(Exception):
16 def __init__(self
, tok
, txt
):
20 class SPDXException(Exception):
21 def __init__(self
, el
, txt
):
25 class SPDXdata(object):
27 self
.license_files
= 0
28 self
.exception_files
= 0
32 class dirinfo(object):
38 def update(self
, fname
, basedir
, miss
):
43 bdir
= os
.path
.dirname(fname
)
44 if bdir
== basedir
.rstrip('/'):
45 self
.files
.append(fname
)
47 # Read the spdx data from the LICENSES directory
48 def read_spdxdata(repo
):
50 # The subdirectories of LICENSES in the kernel source
51 # Note: exceptions needs to be parsed as last directory.
52 license_dirs
= [ "preferred", "dual", "deprecated", "exceptions" ]
53 lictree
= repo
.head
.commit
.tree
['LICENSES']
57 for d
in license_dirs
:
58 for el
in lictree
[d
].traverse():
59 if not os
.path
.isfile(el
.path
):
63 for l
in open(el
.path
, encoding
="utf-8").readlines():
64 if l
.startswith('Valid-License-Identifier:'):
65 lid
= l
.split(':')[1].strip().upper()
66 if lid
in spdx
.licenses
:
67 raise SPDXException(el
, 'Duplicate License Identifier: %s' %lid
)
69 spdx
.licenses
.append(lid
)
71 elif l
.startswith('SPDX-Exception-Identifier:'):
72 exception
= l
.split(':')[1].strip().upper()
73 spdx
.exceptions
[exception
] = []
75 elif l
.startswith('SPDX-Licenses:'):
76 for lic
in l
.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
77 if not lic
in spdx
.licenses
:
78 raise SPDXException(None, 'Exception %s missing license %s' %(exception
, lic
))
79 spdx
.exceptions
[exception
].append(lic
)
81 elif l
.startswith("License-Text:"):
83 if not len(spdx
.exceptions
[exception
]):
84 raise SPDXException(el
, 'Exception %s is missing SPDX-Licenses' %exception
)
85 spdx
.exception_files
+= 1
87 spdx
.license_files
+= 1
91 class id_parser(object):
93 reserved
= [ 'AND', 'OR', 'WITH' ]
94 tokens
= [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
96 precedence
= ( ('nonassoc', 'AND', 'OR'), )
100 def __init__(self
, spdx
):
104 self
.lexer
= lex
.lex(module
= self
, reflags
= re
.UNICODE
)
105 # Initialize the parser. No debug file and no parser rules stored on disk
106 # The rules are small enough to be generated on the fly
107 self
.parser
= yacc
.yacc(module
= self
, write_tables
= False, debug
= False)
108 self
.lines_checked
= 0
119 def set_dirinfo(self
, basedir
, dirdepth
):
121 self
.basedir
= basedir
122 bdir
= basedir
.lstrip('./').rstrip('/')
124 parts
= bdir
.split('/')
127 self
.dirdepth
= dirdepth
+ len(parts
)
129 # Validate License and Exception IDs
130 def validate(self
, tok
):
131 id = tok
.value
.upper()
133 if not id in self
.spdx
.licenses
:
134 raise ParserException(tok
, 'Invalid License ID')
136 elif tok
.type == 'EXC':
137 if id not in self
.spdx
.exceptions
:
138 raise ParserException(tok
, 'Invalid Exception ID')
139 if self
.lastid
not in self
.spdx
.exceptions
[id]:
140 raise ParserException(tok
, 'Exception not valid for license %s' %self
.lastid
)
142 elif tok
.type != 'WITH':
146 def t_RPAR(self
, tok
):
148 self
.lasttok
= tok
.type
151 def t_LPAR(self
, tok
):
153 self
.lasttok
= tok
.type
159 if self
.lasttok
== 'EXC':
161 raise ParserException(tok
, 'Missing parentheses')
163 tok
.value
= tok
.value
.strip()
164 val
= tok
.value
.upper()
166 if val
in self
.reserved
:
168 elif self
.lasttok
== 'WITH':
171 self
.lasttok
= tok
.type
175 def t_error(self
, tok
):
176 raise ParserException(tok
, 'Invalid token')
186 def p_error(self
, p
):
188 raise ParserException(None, 'Unfinished license expression')
190 raise ParserException(p
, 'Syntax error')
192 def parse(self
, expr
):
195 self
.parser
.parse(expr
, lexer
= self
.lexer
)
197 def parse_lines(self
, fd
, maxlines
, fname
):
203 line
= line
.decode(locale
.getpreferredencoding(False), errors
='ignore')
205 if self
.curline
> maxlines
:
207 self
.lines_checked
+= 1
208 if line
.find("SPDX-License-Identifier:") < 0:
210 expr
= line
.split(':')[1].strip()
211 # Remove trailing comment closure
212 if line
.strip().endswith('*/'):
213 expr
= expr
.rstrip('*/').strip()
214 # Remove trailing xml comment closure
215 if line
.strip().endswith('-->'):
216 expr
= expr
.rstrip('-->').strip()
217 # Special case for SH magic boot code files
218 if line
.startswith('LIST \"'):
219 expr
= expr
.rstrip('\"').strip()
223 # Should we check for more SPDX ids in the same file and
224 # complain if there are any?
229 except ParserException
as pe
:
231 col
= line
.find(expr
) + pe
.tok
.lexpos
233 sys
.stdout
.write('%s: %d:%d %s: %s\n' %(fname
, self
.curline
, col
, pe
.txt
, tok
))
235 sys
.stdout
.write('%s: %d:0 %s\n' %(fname
, self
.curline
, pe
.txt
))
236 self
.spdx_errors
+= 1
241 base
= os
.path
.dirname(fname
)
242 if self
.dirdepth
> 0:
243 parts
= base
.split('/')
246 while i
< self
.dirdepth
and i
< len(parts
) and len(parts
[i
]):
247 base
+= '/' + parts
[i
]
249 elif self
.dirdepth
== 0:
252 base
= './' + base
.rstrip('/')
255 di
= self
.spdx_dirs
.get(base
, dirinfo())
256 di
.update(fname
, base
, fail
)
257 self
.spdx_dirs
[base
] = di
259 class pattern(object):
260 def __init__(self
, line
):
262 self
.match
= self
.match_file
264 self
.match
= self
.match_dot
265 elif line
.endswith('/'):
266 self
.pattern
= line
[:-1]
267 self
.match
= self
.match_dir
268 elif line
.startswith('/'):
269 self
.pattern
= line
[1:]
270 self
.match
= self
.match_fn
272 def match_dot(self
, fpath
):
273 return os
.path
.basename(fpath
).startswith('.')
275 def match_file(self
, fpath
):
276 return os
.path
.basename(fpath
) == self
.pattern
278 def match_fn(self
, fpath
):
279 return fnmatch
.fnmatchcase(fpath
, self
.pattern
)
281 def match_dir(self
, fpath
):
282 if self
.match_fn(os
.path
.dirname(fpath
)):
284 return fpath
.startswith(self
.pattern
)
286 def exclude_file(fpath
):
287 for rule
in exclude_rules
:
288 if rule
.match(fpath
):
292 def scan_git_tree(tree
, basedir
, dirdepth
):
293 parser
.set_dirinfo(basedir
, dirdepth
)
294 for el
in tree
.traverse():
295 if not os
.path
.isfile(el
.path
):
297 if exclude_file(el
.path
):
300 with
open(el
.path
, 'rb') as fd
:
301 parser
.parse_lines(fd
, args
.maxlines
, el
.path
)
303 def scan_git_subtree(tree
, path
, dirdepth
):
304 for p
in path
.strip('/').split('/'):
306 scan_git_tree(tree
, path
.strip('/'), dirdepth
)
308 def read_exclude_file(fname
):
312 with
open(fname
) as fd
:
315 if line
.startswith('#'):
319 rules
.append(pattern(line
))
322 if __name__
== '__main__':
324 ap
= ArgumentParser(description
='SPDX expression checker')
325 ap
.add_argument('path', nargs
='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
326 ap
.add_argument('-d', '--dirs', action
='store_true',
327 help='Show [sub]directory statistics.')
328 ap
.add_argument('-D', '--depth', type=int, default
=-1,
329 help='Directory depth for -d statistics. Default: unlimited')
330 ap
.add_argument('-e', '--exclude',
331 help='File containing file patterns to exclude. Default: scripts/spdxexclude')
332 ap
.add_argument('-f', '--files', action
='store_true',
333 help='Show files without SPDX.')
334 ap
.add_argument('-m', '--maxlines', type=int, default
=15,
335 help='Maximum number of lines to scan in a file. Default 15')
336 ap
.add_argument('-v', '--verbose', action
='store_true', help='Verbose statistics output')
337 args
= ap
.parse_args()
339 # Sanity check path arguments
340 if '-' in args
.path
and len(args
.path
) > 1:
341 sys
.stderr
.write('stdin input "-" must be the only path argument\n')
345 # Use git to get the valid license expressions
346 repo
= git
.Repo(os
.getcwd())
349 # Initialize SPDX data
350 spdx
= read_spdxdata(repo
)
352 # Initialize the parser
353 parser
= id_parser(spdx
)
355 except SPDXException
as se
:
357 sys
.stderr
.write('%s: %s\n' %(se
.el
.path
, se
.txt
))
359 sys
.stderr
.write('%s\n' %se.txt
)
362 except Exception as ex
:
363 sys
.stderr
.write('FAIL: %s\n' %ex)
364 sys
.stderr
.write('%s\n' %traceback
.format_exc())
370 fname
= os
.path
.join(os
.path
.dirname(__file__
), 'spdxexclude')
371 exclude_rules
= read_exclude_file(fname
)
372 except Exception as ex
:
373 sys
.stderr
.write('FAIL: Reading exclude file %s: %s\n' %(fname
, ex
))
377 if len(args
.path
) and args
.path
[0] == '-':
378 stdin
= os
.fdopen(sys
.stdin
.fileno(), 'rb')
379 parser
.parse_lines(stdin
, args
.maxlines
, '-')
383 if os
.path
.isfile(p
):
384 parser
.parse_lines(open(p
, 'rb'), args
.maxlines
, p
)
385 elif os
.path
.isdir(p
):
386 scan_git_subtree(repo
.head
.reference
.commit
.tree
, p
,
389 sys
.stderr
.write('path %s does not exist\n' %p
)
393 scan_git_tree(repo
.head
.commit
.tree
, '.', args
.depth
)
395 ndirs
= len(parser
.spdx_dirs
)
398 for di
in parser
.spdx_dirs
.values():
403 sys
.stderr
.write('\n')
404 sys
.stderr
.write('License files: %12d\n' %spdx
.license_files
)
405 sys
.stderr
.write('Exception files: %12d\n' %spdx
.exception_files
)
406 sys
.stderr
.write('License IDs %12d\n' %len(spdx
.licenses
))
407 sys
.stderr
.write('Exception IDs %12d\n' %len(spdx
.exceptions
))
408 sys
.stderr
.write('\n')
409 sys
.stderr
.write('Files excluded: %12d\n' %parser
.excluded
)
410 sys
.stderr
.write('Files checked: %12d\n' %parser
.checked
)
411 sys
.stderr
.write('Lines checked: %12d\n' %parser
.lines_checked
)
413 pc
= int(100 * parser
.spdx_valid
/ parser
.checked
)
414 sys
.stderr
.write('Files with SPDX: %12d %3d%%\n' %(parser
.spdx_valid
, pc
))
415 missing
= parser
.checked
- parser
.spdx_valid
416 mpc
= int(100 * missing
/ parser
.checked
)
417 sys
.stderr
.write('Files without SPDX:%12d %3d%%\n' %(missing
, mpc
))
418 sys
.stderr
.write('Files with errors: %12d\n' %parser
.spdx_errors
)
420 sys
.stderr
.write('\n')
421 sys
.stderr
.write('Directories accounted: %8d\n' %ndirs
)
422 pc
= int(100 * dirsok
/ ndirs
)
423 sys
.stderr
.write('Directories complete: %8d %3d%%\n' %(dirsok
, pc
))
425 if ndirs
and ndirs
!= dirsok
and args
.dirs
:
427 sys
.stderr
.write('\n')
428 sys
.stderr
.write('Incomplete directories: SPDX in Files\n')
429 for f
in sorted(parser
.spdx_dirs
.keys()):
430 di
= parser
.spdx_dirs
[f
]
432 valid
= di
.total
- di
.missing
433 pc
= int(100 * valid
/ di
.total
)
434 sys
.stderr
.write(' %-80s: %5d of %5d %3d%%\n' %(f
, valid
, di
.total
, pc
))
436 if ndirs
and ndirs
!= dirsok
and args
.files
:
437 if args
.verbose
or args
.dirs
:
438 sys
.stderr
.write('\n')
439 sys
.stderr
.write('Files without SPDX:\n')
440 for f
in sorted(parser
.spdx_dirs
.keys()):
441 di
= parser
.spdx_dirs
[f
]
442 for f
in sorted(di
.files
):
443 sys
.stderr
.write(' %s\n' %f)
447 except Exception as ex
:
448 sys
.stderr
.write('FAIL: %s\n' %ex)
449 sys
.stderr
.write('%s\n' %traceback
.format_exc())