2 # SPDX-License-Identifier: GPL-2.0
3 # Copyright Thomas Gleixner <tglx@linutronix.de>
5 from argparse
import ArgumentParser
6 from ply
import lex
, yacc
14 class ParserException(Exception):
15 def __init__(self
, tok
, txt
):
19 class SPDXException(Exception):
20 def __init__(self
, el
, txt
):
24 class SPDXdata(object):
26 self
.license_files
= 0
27 self
.exception_files
= 0
31 # Read the spdx data from the LICENSES directory
32 def read_spdxdata(repo
):
34 # The subdirectories of LICENSES in the kernel source
35 license_dirs
= [ "preferred", "other", "exceptions" ]
36 lictree
= repo
.head
.commit
.tree
['LICENSES']
40 for d
in license_dirs
:
41 for el
in lictree
[d
].traverse():
42 if not os
.path
.isfile(el
.path
):
46 for l
in open(el
.path
).readlines():
47 if l
.startswith('Valid-License-Identifier:'):
48 lid
= l
.split(':')[1].strip().upper()
49 if lid
in spdx
.licenses
:
50 raise SPDXException(el
, 'Duplicate License Identifier: %s' %lid
)
52 spdx
.licenses
.append(lid
)
54 elif l
.startswith('SPDX-Exception-Identifier:'):
55 exception
= l
.split(':')[1].strip().upper()
56 spdx
.exceptions
[exception
] = []
58 elif l
.startswith('SPDX-Licenses:'):
59 for lic
in l
.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
60 if not lic
in spdx
.licenses
:
61 raise SPDXException(None, 'Exception %s missing license %s' %(ex
, lic
))
62 spdx
.exceptions
[exception
].append(lic
)
64 elif l
.startswith("License-Text:"):
66 if not len(spdx
.exceptions
[exception
]):
67 raise SPDXException(el
, 'Exception %s is missing SPDX-Licenses' %excid)
68 spdx
.exception_files
+= 1
70 spdx
.license_files
+= 1
74 class id_parser(object):
76 reserved
= [ 'AND', 'OR', 'WITH' ]
77 tokens
= [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
79 precedence
= ( ('nonassoc', 'AND', 'OR'), )
83 def __init__(self
, spdx
):
87 self
.lexer
= lex
.lex(module
= self
, reflags
= re
.UNICODE
)
88 # Initialize the parser. No debug file and no parser rules stored on disk
89 # The rules are small enough to be generated on the fly
90 self
.parser
= yacc
.yacc(module
= self
, write_tables
= False, debug
= False)
91 self
.lines_checked
= 0
98 # Validate License and Exception IDs
99 def validate(self
, tok
):
100 id = tok
.value
.upper()
102 if not id in self
.spdx
.licenses
:
103 raise ParserException(tok
, 'Invalid License ID')
105 elif tok
.type == 'EXC':
106 if id not in self
.spdx
.exceptions
:
107 raise ParserException(tok
, 'Invalid Exception ID')
108 if self
.lastid
not in self
.spdx
.exceptions
[id]:
109 raise ParserException(tok
, 'Exception not valid for license %s' %self
.lastid
)
111 elif tok
.type != 'WITH':
115 def t_RPAR(self
, tok
):
117 self
.lasttok
= tok
.type
120 def t_LPAR(self
, tok
):
122 self
.lasttok
= tok
.type
128 if self
.lasttok
== 'EXC':
130 raise ParserException(tok
, 'Missing parentheses')
132 tok
.value
= tok
.value
.strip()
133 val
= tok
.value
.upper()
135 if val
in self
.reserved
:
137 elif self
.lasttok
== 'WITH':
140 self
.lasttok
= tok
.type
144 def t_error(self
, tok
):
145 raise ParserException(tok
, 'Invalid token')
155 def p_error(self
, p
):
157 raise ParserException(None, 'Unfinished license expression')
159 raise ParserException(p
, 'Syntax error')
161 def parse(self
, expr
):
164 self
.parser
.parse(expr
, lexer
= self
.lexer
)
166 def parse_lines(self
, fd
, maxlines
, fname
):
171 line
= line
.decode(locale
.getpreferredencoding(False), errors
='ignore')
173 if self
.curline
> maxlines
:
175 self
.lines_checked
+= 1
176 if line
.find("SPDX-License-Identifier:") < 0:
178 expr
= line
.split(':')[1].strip()
179 # Remove trailing comment closure
180 if line
.strip().endswith('*/'):
181 expr
= expr
.rstrip('*/').strip()
182 # Special case for SH magic boot code files
183 if line
.startswith('LIST \"'):
184 expr
= expr
.rstrip('\"').strip()
188 # Should we check for more SPDX ids in the same file and
189 # complain if there are any?
193 except ParserException
as pe
:
195 col
= line
.find(expr
) + pe
.tok
.lexpos
197 sys
.stdout
.write('%s: %d:%d %s: %s\n' %(fname
, self
.curline
, col
, pe
.txt
, tok
))
199 sys
.stdout
.write('%s: %d:0 %s\n' %(fname
, self
.curline
, col
, pe
.txt
))
200 self
.spdx_errors
+= 1
202 def scan_git_tree(tree
):
203 for el
in tree
.traverse():
204 # Exclude stuff which would make pointless noise
205 # FIXME: Put this somewhere more sensible
206 if el
.path
.startswith("LICENSES"):
208 if el
.path
.find("license-rules.rst") >= 0:
210 if not os
.path
.isfile(el
.path
):
212 with
open(el
.path
, 'rb') as fd
:
213 parser
.parse_lines(fd
, args
.maxlines
, el
.path
)
215 def scan_git_subtree(tree
, path
):
216 for p
in path
.strip('/').split('/'):
220 if __name__
== '__main__':
222 ap
= ArgumentParser(description
='SPDX expression checker')
223 ap
.add_argument('path', nargs
='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
224 ap
.add_argument('-m', '--maxlines', type=int, default
=15,
225 help='Maximum number of lines to scan in a file. Default 15')
226 ap
.add_argument('-v', '--verbose', action
='store_true', help='Verbose statistics output')
227 args
= ap
.parse_args()
229 # Sanity check path arguments
230 if '-' in args
.path
and len(args
.path
) > 1:
231 sys
.stderr
.write('stdin input "-" must be the only path argument\n')
235 # Use git to get the valid license expressions
236 repo
= git
.Repo(os
.getcwd())
239 # Initialize SPDX data
240 spdx
= read_spdxdata(repo
)
242 # Initilize the parser
243 parser
= id_parser(spdx
)
245 except SPDXException
as se
:
247 sys
.stderr
.write('%s: %s\n' %(se
.el
.path
, se
.txt
))
249 sys
.stderr
.write('%s\n' %se.txt
)
252 except Exception as ex
:
253 sys
.stderr
.write('FAIL: %s\n' %ex)
254 sys
.stderr
.write('%s\n' %traceback
.format_exc())
258 if len(args
.path
) and args
.path
[0] == '-':
259 stdin
= os
.fdopen(sys
.stdin
.fileno(), 'rb')
260 parser
.parse_lines(stdin
, args
.maxlines
, '-')
264 if os
.path
.isfile(p
):
265 parser
.parse_lines(open(p
, 'rb'), args
.maxlines
, p
)
266 elif os
.path
.isdir(p
):
267 scan_git_subtree(repo
.head
.reference
.commit
.tree
, p
)
269 sys
.stderr
.write('path %s does not exist\n' %p
)
273 scan_git_tree(repo
.head
.commit
.tree
)
276 sys
.stderr
.write('\n')
277 sys
.stderr
.write('License files: %12d\n' %spdx
.license_files
)
278 sys
.stderr
.write('Exception files: %12d\n' %spdx
.exception_files
)
279 sys
.stderr
.write('License IDs %12d\n' %len(spdx
.licenses
))
280 sys
.stderr
.write('Exception IDs %12d\n' %len(spdx
.exceptions
))
281 sys
.stderr
.write('\n')
282 sys
.stderr
.write('Files checked: %12d\n' %parser
.checked
)
283 sys
.stderr
.write('Lines checked: %12d\n' %parser
.lines_checked
)
284 sys
.stderr
.write('Files with SPDX: %12d\n' %parser
.spdx_valid
)
285 sys
.stderr
.write('Files with errors: %12d\n' %parser
.spdx_errors
)
289 except Exception as ex
:
290 sys
.stderr
.write('FAIL: %s\n' %ex)
291 sys
.stderr
.write('%s\n' %traceback
.format_exc())