1#!/usr/bin/env python 2# Run with directory arguments from any directory, with no special setup required. 3 4import ftplib 5import hashlib 6import os 7import re 8import shutil 9import string 10import subprocess 11import sys 12import tarfile 13import tempfile 14 15VERBOSE = False 16 17def warn(s): 18 sys.stderr.write("warning: %s\n" % s) 19 20def warn_verbose(s): 21 if VERBOSE: 22 warn(s) 23 24def is_interesting(path): 25 path = path.lower() 26 uninteresting_extensions = [ 27 ".bp", 28 ".map", 29 ".md", 30 ".mk", 31 ".py", 32 ".pyc", 33 ".swp", 34 ".txt", 35 ] 36 if os.path.splitext(path)[1] in uninteresting_extensions: 37 return False 38 if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/pylintrc"): 39 return False 40 return True 41 42def is_auto_generated(content): 43 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: 44 return True 45 if "This header was automatically generated from a Linux kernel header" in content: 46 return True 47 return False 48 49copyrights = set() 50 51def extract_copyright_at(lines, i): 52 hash = lines[i].startswith("#") 53 54 # Do we need to back up to find the start of the copyright header? 55 start = i 56 if not hash: 57 while start > 0: 58 if "/*" in lines[start - 1]: 59 break 60 start -= 1 61 62 # Read comment lines until we hit something that terminates a 63 # copyright header. 64 while i < len(lines): 65 if "*/" in lines[i]: 66 break 67 if hash and len(lines[i]) == 0: 68 break 69 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: 70 break 71 if "\tcitrus Id: " in lines[i]: 72 break 73 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: 74 break 75 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: 76 break 77 # OpenBSD likes to say where stuff originally came from: 78 if "Original version ID:" in lines[i]: 79 break 80 i += 1 81 82 end = i 83 84 # Trim trailing cruft. 85 while end > 0: 86 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": 87 break 88 end -= 1 89 90 # Remove C/assembler comment formatting, pulling out just the text. 91 clean_lines = [] 92 for line in lines[start:end]: 93 line = line.replace("\t", " ") 94 line = line.replace("/* ", "") 95 line = re.sub("^ \* ", "", line) 96 line = line.replace("** ", "") 97 line = line.replace("# ", "") 98 if "SPDX-License-Identifier:" in line: 99 continue 100 if line.startswith("++Copyright++"): 101 continue 102 line = line.replace("--Copyright--", "") 103 line = line.rstrip() 104 # These come last and take care of "blank" comment lines. 105 if line == "#" or line == " *" or line == "**" or line == "-": 106 line = "" 107 clean_lines.append(line) 108 109 # Trim blank lines from head and tail. 110 while clean_lines[0] == "": 111 clean_lines = clean_lines[1:] 112 while clean_lines[len(clean_lines) - 1] == "": 113 clean_lines = clean_lines[0:(len(clean_lines) - 1)] 114 115 copyright = "\n".join(clean_lines) 116 copyrights.add(copyright) 117 118 return i 119 120 121def do_file(path): 122 with open(path, "r") as the_file: 123 try: 124 content = open(path, "r").read().decode("utf-8") 125 except UnicodeDecodeError: 126 warn("bad UTF-8 in %s" % path) 127 content = open(path, "r").read().decode("iso-8859-1") 128 129 lines = content.split("\n") 130 131 if len(lines) <= 4: 132 warn_verbose("ignoring short file %s" % path) 133 return 134 135 if is_auto_generated(content): 136 warn_verbose("ignoring auto-generated file %s" % path) 137 return 138 139 if not "Copyright" in content: 140 if "public domain" in content.lower(): 141 warn_verbose("ignoring public domain file %s" % path) 142 return 143 warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) 144 return 145 146 # Manually iterate because extract_copyright_at tells us how many lines to skip. 147 i = 0 148 while i < len(lines): 149 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: 150 i = extract_copyright_at(lines, i) 151 else: 152 i += 1 153 154 155def do_dir(path): 156 for directory, sub_directories, filenames in os.walk(arg): 157 if ".git" in sub_directories: 158 sub_directories.remove(".git") 159 sub_directories = sorted(sub_directories) 160 161 for filename in sorted(filenames): 162 path = os.path.join(directory, filename) 163 if is_interesting(path): 164 do_file(path) 165 166 167args = sys.argv[1:] 168if len(args) == 0: 169 args = [ "." ] 170 171for arg in args: 172 if os.path.isdir(arg): 173 do_dir(arg) 174 else: 175 do_file(arg) 176 177for copyright in sorted(copyrights): 178 print copyright.encode("utf-8") 179 print 180 print "-------------------------------------------------------------------" 181 print 182 183sys.exit(0) 184