2 # SPDX-License-Identifier: GPL-2.0
3 # Copyright Thomas Gleixner <tglx@linutronix.de>
5 from argparse
import ArgumentParser
6 from ply
import lex
, yacc
14 class ParserException(Exception):
15 def __init__(self
, tok
, txt
):
19 class SPDXException(Exception):
20 def __init__(self
, el
, txt
):
24 class SPDXdata(object):
26 self
.license_files
= 0
27 self
.exception_files
= 0
31 # Read the spdx data from the LICENSES directory
32 def read_spdxdata(repo
):
34 # The subdirectories of LICENSES in the kernel source
35 # Note: exceptions needs to be parsed as last directory.
36 license_dirs
= [ "preferred", "dual", "deprecated", "exceptions" ]
37 lictree
= repo
.head
.commit
.tree
['LICENSES']
41 for d
in license_dirs
:
42 for el
in lictree
[d
].traverse():
43 if not os
.path
.isfile(el
.path
):
47 for l
in open(el
.path
).readlines():
48 if l
.startswith('Valid-License-Identifier:'):
49 lid
= l
.split(':')[1].strip().upper()
50 if lid
in spdx
.licenses
:
51 raise SPDXException(el
, 'Duplicate License Identifier: %s' %lid
)
53 spdx
.licenses
.append(lid
)
55 elif l
.startswith('SPDX-Exception-Identifier:'):
56 exception
= l
.split(':')[1].strip().upper()
57 spdx
.exceptions
[exception
] = []
59 elif l
.startswith('SPDX-Licenses:'):
60 for lic
in l
.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
61 if not lic
in spdx
.licenses
:
62 raise SPDXException(None, 'Exception %s missing license %s' %(exception
, lic
))
63 spdx
.exceptions
[exception
].append(lic
)
65 elif l
.startswith("License-Text:"):
67 if not len(spdx
.exceptions
[exception
]):
68 raise SPDXException(el
, 'Exception %s is missing SPDX-Licenses' %exception
)
69 spdx
.exception_files
+= 1
71 spdx
.license_files
+= 1
75 class id_parser(object):
77 reserved
= [ 'AND', 'OR', 'WITH' ]
78 tokens
= [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
80 precedence
= ( ('nonassoc', 'AND', 'OR'), )
84 def __init__(self
, spdx
):
88 self
.lexer
= lex
.lex(module
= self
, reflags
= re
.UNICODE
)
89 # Initialize the parser. No debug file and no parser rules stored on disk
90 # The rules are small enough to be generated on the fly
91 self
.parser
= yacc
.yacc(module
= self
, write_tables
= False, debug
= False)
92 self
.lines_checked
= 0
99 # Validate License and Exception IDs
100 def validate(self
, tok
):
101 id = tok
.value
.upper()
103 if not id in self
.spdx
.licenses
:
104 raise ParserException(tok
, 'Invalid License ID')
106 elif tok
.type == 'EXC':
107 if id not in self
.spdx
.exceptions
:
108 raise ParserException(tok
, 'Invalid Exception ID')
109 if self
.lastid
not in self
.spdx
.exceptions
[id]:
110 raise ParserException(tok
, 'Exception not valid for license %s' %self
.lastid
)
112 elif tok
.type != 'WITH':
116 def t_RPAR(self
, tok
):
118 self
.lasttok
= tok
.type
121 def t_LPAR(self
, tok
):
123 self
.lasttok
= tok
.type
129 if self
.lasttok
== 'EXC':
131 raise ParserException(tok
, 'Missing parentheses')
133 tok
.value
= tok
.value
.strip()
134 val
= tok
.value
.upper()
136 if val
in self
.reserved
:
138 elif self
.lasttok
== 'WITH':
141 self
.lasttok
= tok
.type
145 def t_error(self
, tok
):
146 raise ParserException(tok
, 'Invalid token')
156 def p_error(self
, p
):
158 raise ParserException(None, 'Unfinished license expression')
160 raise ParserException(p
, 'Syntax error')
162 def parse(self
, expr
):
165 self
.parser
.parse(expr
, lexer
= self
.lexer
)
167 def parse_lines(self
, fd
, maxlines
, fname
):
172 line
= line
.decode(locale
.getpreferredencoding(False), errors
='ignore')
174 if self
.curline
> maxlines
:
176 self
.lines_checked
+= 1
177 if line
.find("SPDX-License-Identifier:") < 0:
179 expr
= line
.split(':')[1].strip()
180 # Remove trailing comment closure
181 if line
.strip().endswith('*/'):
182 expr
= expr
.rstrip('*/').strip()
183 # Remove trailing xml comment closure
184 if line
.strip().endswith('-->'):
185 expr
= expr
.rstrip('-->').strip()
186 # Special case for SH magic boot code files
187 if line
.startswith('LIST \"'):
188 expr
= expr
.rstrip('\"').strip()
192 # Should we check for more SPDX ids in the same file and
193 # complain if there are any?
197 except ParserException
as pe
:
199 col
= line
.find(expr
) + pe
.tok
.lexpos
201 sys
.stdout
.write('%s: %d:%d %s: %s\n' %(fname
, self
.curline
, col
, pe
.txt
, tok
))
203 sys
.stdout
.write('%s: %d:0 %s\n' %(fname
, self
.curline
, col
, pe
.txt
))
204 self
.spdx_errors
+= 1
206 def scan_git_tree(tree
):
207 for el
in tree
.traverse():
208 # Exclude stuff which would make pointless noise
209 # FIXME: Put this somewhere more sensible
210 if el
.path
.startswith("LICENSES"):
212 if el
.path
.find("license-rules.rst") >= 0:
214 if not os
.path
.isfile(el
.path
):
216 with
open(el
.path
, 'rb') as fd
:
217 parser
.parse_lines(fd
, args
.maxlines
, el
.path
)
219 def scan_git_subtree(tree
, path
):
220 for p
in path
.strip('/').split('/'):
224 if __name__
== '__main__':
226 ap
= ArgumentParser(description
='SPDX expression checker')
227 ap
.add_argument('path', nargs
='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
228 ap
.add_argument('-m', '--maxlines', type=int, default
=15,
229 help='Maximum number of lines to scan in a file. Default 15')
230 ap
.add_argument('-v', '--verbose', action
='store_true', help='Verbose statistics output')
231 args
= ap
.parse_args()
233 # Sanity check path arguments
234 if '-' in args
.path
and len(args
.path
) > 1:
235 sys
.stderr
.write('stdin input "-" must be the only path argument\n')
239 # Use git to get the valid license expressions
240 repo
= git
.Repo(os
.getcwd())
243 # Initialize SPDX data
244 spdx
= read_spdxdata(repo
)
246 # Initilize the parser
247 parser
= id_parser(spdx
)
249 except SPDXException
as se
:
251 sys
.stderr
.write('%s: %s\n' %(se
.el
.path
, se
.txt
))
253 sys
.stderr
.write('%s\n' %se.txt
)
256 except Exception as ex
:
257 sys
.stderr
.write('FAIL: %s\n' %ex)
258 sys
.stderr
.write('%s\n' %traceback
.format_exc())
262 if len(args
.path
) and args
.path
[0] == '-':
263 stdin
= os
.fdopen(sys
.stdin
.fileno(), 'rb')
264 parser
.parse_lines(stdin
, args
.maxlines
, '-')
268 if os
.path
.isfile(p
):
269 parser
.parse_lines(open(p
, 'rb'), args
.maxlines
, p
)
270 elif os
.path
.isdir(p
):
271 scan_git_subtree(repo
.head
.reference
.commit
.tree
, p
)
273 sys
.stderr
.write('path %s does not exist\n' %p
)
277 scan_git_tree(repo
.head
.commit
.tree
)
280 sys
.stderr
.write('\n')
281 sys
.stderr
.write('License files: %12d\n' %spdx
.license_files
)
282 sys
.stderr
.write('Exception files: %12d\n' %spdx
.exception_files
)
283 sys
.stderr
.write('License IDs %12d\n' %len(spdx
.licenses
))
284 sys
.stderr
.write('Exception IDs %12d\n' %len(spdx
.exceptions
))
285 sys
.stderr
.write('\n')
286 sys
.stderr
.write('Files checked: %12d\n' %parser
.checked
)
287 sys
.stderr
.write('Lines checked: %12d\n' %parser
.lines_checked
)
288 sys
.stderr
.write('Files with SPDX: %12d\n' %parser
.spdx_valid
)
289 sys
.stderr
.write('Files with errors: %12d\n' %parser
.spdx_errors
)
293 except Exception as ex
:
294 sys
.stderr
.write('FAIL: %s\n' %ex)
295 sys
.stderr
.write('%s\n' %traceback
.format_exc())