aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/spdxcheck.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/spdxcheck.py')
-rwxr-xr-xscripts/spdxcheck.py187
1 files changed, 171 insertions, 16 deletions
diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py
index 04ddfc65c353..18cb9f5b3d3d 100755
--- a/scripts/spdxcheck.py
+++ b/scripts/spdxcheck.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
# Copyright Thomas Gleixner <tglx@linutronix.de>
@@ -6,6 +6,7 @@ from argparse import ArgumentParser
from ply import lex, yacc
import locale
import traceback
+import fnmatch
import sys
import git
import re
@@ -28,6 +29,21 @@ class SPDXdata(object):
self.licenses = [ ]
self.exceptions = { }
+class dirinfo(object):
+ def __init__(self):
+ self.missing = 0
+ self.total = 0
+ self.files = []
+
+ def update(self, fname, basedir, miss):
+ self.total += 1
+ self.missing += miss
+ if miss:
+ fname = './' + fname
+ bdir = os.path.dirname(fname)
+ if bdir == basedir.rstrip('/'):
+ self.files.append(fname)
+
# Read the spdx data from the LICENSES directory
def read_spdxdata(repo):
@@ -44,7 +60,7 @@ def read_spdxdata(repo):
continue
exception = None
- for l in open(el.path).readlines():
+ for l in open(el.path, encoding="utf-8").readlines():
if l.startswith('Valid-License-Identifier:'):
lid = l.split(':')[1].strip().upper()
if lid in spdx.licenses:
@@ -91,11 +107,25 @@ class id_parser(object):
self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
self.lines_checked = 0
self.checked = 0
+ self.excluded = 0
self.spdx_valid = 0
self.spdx_errors = 0
+ self.spdx_dirs = {}
+ self.dirdepth = -1
+ self.basedir = '.'
self.curline = 0
self.deepest = 0
+ def set_dirinfo(self, basedir, dirdepth):
+ if dirdepth >= 0:
+ self.basedir = basedir
+ bdir = basedir.lstrip('./').rstrip('/')
+ if bdir != '':
+ parts = bdir.split('/')
+ else:
+ parts = []
+ self.dirdepth = dirdepth + len(parts)
+
# Validate License and Exception IDs
def validate(self, tok):
id = tok.value.upper()
@@ -167,8 +197,10 @@ class id_parser(object):
def parse_lines(self, fd, maxlines, fname):
self.checked += 1
self.curline = 0
+ fail = 1
try:
for line in fd:
+ line = line.decode(locale.getpreferredencoding(False), errors='ignore')
self.curline += 1
if self.curline > maxlines:
break
@@ -179,6 +211,9 @@ class id_parser(object):
# Remove trailing comment closure
if line.strip().endswith('*/'):
expr = expr.rstrip('*/').strip()
+ # Remove trailing xml comment closure
+ if line.strip().endswith('-->'):
+ expr = expr.rstrip('-->').strip()
# Special case for SH magic boot code files
if line.startswith('LIST \"'):
expr = expr.rstrip('\"').strip()
@@ -188,6 +223,7 @@ class id_parser(object):
# Should we check for more SPDX ids in the same file and
# complain if there are any?
#
+ fail = 0
break
except ParserException as pe:
@@ -196,31 +232,105 @@ class id_parser(object):
tok = pe.tok.value
sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok))
else:
- sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, col, pe.txt))
+ sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, pe.txt))
self.spdx_errors += 1
-def scan_git_tree(tree):
+ if fname == '-':
+ return
+
+ base = os.path.dirname(fname)
+ if self.dirdepth > 0:
+ parts = base.split('/')
+ i = 0
+ base = '.'
+ while i < self.dirdepth and i < len(parts) and len(parts[i]):
+ base += '/' + parts[i]
+ i += 1
+ elif self.dirdepth == 0:
+ base = self.basedir
+ else:
+ base = './' + base.rstrip('/')
+ base += '/'
+
+ di = self.spdx_dirs.get(base, dirinfo())
+ di.update(fname, base, fail)
+ self.spdx_dirs[base] = di
+
+class pattern(object):
+ def __init__(self, line):
+ self.pattern = line
+ self.match = self.match_file
+ if line == '.*':
+ self.match = self.match_dot
+ elif line.endswith('/'):
+ self.pattern = line[:-1]
+ self.match = self.match_dir
+ elif line.startswith('/'):
+ self.pattern = line[1:]
+ self.match = self.match_fn
+
+ def match_dot(self, fpath):
+ return os.path.basename(fpath).startswith('.')
+
+ def match_file(self, fpath):
+ return os.path.basename(fpath) == self.pattern
+
+ def match_fn(self, fpath):
+ return fnmatch.fnmatchcase(fpath, self.pattern)
+
+ def match_dir(self, fpath):
+ if self.match_fn(os.path.dirname(fpath)):
+ return True
+ return fpath.startswith(self.pattern)
+
+def exclude_file(fpath):
+ for rule in exclude_rules:
+ if rule.match(fpath):
+ return True
+ return False
+
+def scan_git_tree(tree, basedir, dirdepth):
+ parser.set_dirinfo(basedir, dirdepth)
for el in tree.traverse():
- # Exclude stuff which would make pointless noise
- # FIXME: Put this somewhere more sensible
- if el.path.startswith("LICENSES"):
- continue
- if el.path.find("license-rules.rst") >= 0:
- continue
if not os.path.isfile(el.path):
continue
+ if exclude_file(el.path):
+ parser.excluded += 1
+ continue
with open(el.path, 'rb') as fd:
parser.parse_lines(fd, args.maxlines, el.path)
-def scan_git_subtree(tree, path):
+def scan_git_subtree(tree, path, dirdepth):
for p in path.strip('/').split('/'):
tree = tree[p]
- scan_git_tree(tree)
+ scan_git_tree(tree, path.strip('/'), dirdepth)
+
+def read_exclude_file(fname):
+ rules = []
+ if not fname:
+ return rules
+ with open(fname) as fd:
+ for line in fd:
+ line = line.strip()
+ if line.startswith('#'):
+ continue
+ if not len(line):
+ continue
+ rules.append(pattern(line))
+ return rules
if __name__ == '__main__':
ap = ArgumentParser(description='SPDX expression checker')
ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
+ ap.add_argument('-d', '--dirs', action='store_true',
+ help='Show [sub]directory statistics.')
+ ap.add_argument('-D', '--depth', type=int, default=-1,
+ help='Directory depth for -d statistics. Default: unlimited')
+ ap.add_argument('-e', '--exclude',
+ help='File containing file patterns to exclude. Default: scripts/spdxexclude')
+ ap.add_argument('-f', '--files', action='store_true',
+ help='Show files without SPDX.')
ap.add_argument('-m', '--maxlines', type=int, default=15,
help='Maximum number of lines to scan in a file. Default 15')
ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
@@ -239,7 +349,7 @@ if __name__ == '__main__':
# Initialize SPDX data
spdx = read_spdxdata(repo)
- # Initilize the parser
+ # Initialize the parser
parser = id_parser(spdx)
except SPDXException as se:
@@ -255,6 +365,15 @@ if __name__ == '__main__':
sys.exit(1)
try:
+ fname = args.exclude
+ if not fname:
+ fname = os.path.join(os.path.dirname(__file__), 'spdxexclude')
+ exclude_rules = read_exclude_file(fname)
+ except Exception as ex:
+ sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex))
+ sys.exit(1)
+
+ try:
if len(args.path) and args.path[0] == '-':
stdin = os.fdopen(sys.stdin.fileno(), 'rb')
parser.parse_lines(stdin, args.maxlines, '-')
@@ -264,13 +383,21 @@ if __name__ == '__main__':
if os.path.isfile(p):
parser.parse_lines(open(p, 'rb'), args.maxlines, p)
elif os.path.isdir(p):
- scan_git_subtree(repo.head.reference.commit.tree, p)
+ scan_git_subtree(repo.head.reference.commit.tree, p,
+ args.depth)
else:
sys.stderr.write('path %s does not exist\n' %p)
sys.exit(1)
else:
# Full git tree scan
- scan_git_tree(repo.head.commit.tree)
+ scan_git_tree(repo.head.commit.tree, '.', args.depth)
+
+ ndirs = len(parser.spdx_dirs)
+ dirsok = 0
+ if ndirs:
+ for di in parser.spdx_dirs.values():
+ if not di.missing:
+ dirsok += 1
if args.verbose:
sys.stderr.write('\n')
@@ -279,10 +406,38 @@ if __name__ == '__main__':
sys.stderr.write('License IDs %12d\n' %len(spdx.licenses))
sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions))
sys.stderr.write('\n')
+ sys.stderr.write('Files excluded: %12d\n' %parser.excluded)
sys.stderr.write('Files checked: %12d\n' %parser.checked)
sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
- sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid)
+ if parser.checked:
+ pc = int(100 * parser.spdx_valid / parser.checked)
+ sys.stderr.write('Files with SPDX: %12d %3d%%\n' %(parser.spdx_valid, pc))
sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
+ if ndirs:
+ sys.stderr.write('\n')
+ sys.stderr.write('Directories accounted: %8d\n' %ndirs)
+ pc = int(100 * dirsok / ndirs)
+ sys.stderr.write('Directories complete: %8d %3d%%\n' %(dirsok, pc))
+
+ if ndirs and ndirs != dirsok and args.dirs:
+ if args.verbose:
+ sys.stderr.write('\n')
+ sys.stderr.write('Incomplete directories: SPDX in Files\n')
+ for f in sorted(parser.spdx_dirs.keys()):
+ di = parser.spdx_dirs[f]
+ if di.missing:
+ valid = di.total - di.missing
+ pc = int(100 * valid / di.total)
+ sys.stderr.write(' %-80s: %5d of %5d %3d%%\n' %(f, valid, di.total, pc))
+
+ if ndirs and ndirs != dirsok and args.files:
+ if args.verbose or args.dirs:
+ sys.stderr.write('\n')
+ sys.stderr.write('Files without SPDX:\n')
+ for f in sorted(parser.spdx_dirs.keys()):
+ di = parser.spdx_dirs[f]
+ for f in sorted(di.files):
+ sys.stderr.write(' %s\n' %f)
sys.exit(0)