1#!/usr/bin/env python
2# Run with directory arguments from any directory, with no special setup required.
3
4import ftplib
5import hashlib
6import os
7import re
8import shutil
9import string
10import subprocess
11import sys
12import tarfile
13import tempfile
14
15VERBOSE = False
16
17def warn(s):
18    sys.stderr.write("warning: %s\n" % s)
19
20def warn_verbose(s):
21    if VERBOSE:
22        warn(s)
23
24def is_interesting(path):
25    path = path.lower()
26    uninteresting_extensions = [
27        ".bp",
28        ".map",
29        ".md",
30        ".mk",
31        ".py",
32        ".pyc",
33        ".swp",
34        ".txt",
35    ]
36    if os.path.splitext(path)[1] in uninteresting_extensions:
37        return False
38    if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/pylintrc"):
39        return False
40    return True
41
42def is_auto_generated(content):
43    if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
44        return True
45    if "This header was automatically generated from a Linux kernel header" in content:
46        return True
47    return False
48
49copyrights = set()
50
51def extract_copyright_at(lines, i):
52    hash = lines[i].startswith("#")
53
54    # Do we need to back up to find the start of the copyright header?
55    start = i
56    if not hash:
57        while start > 0:
58            if "/*" in lines[start - 1]:
59                break
60            start -= 1
61
62    # Read comment lines until we hit something that terminates a
63    # copyright header.
64    while i < len(lines):
65        if "*/" in lines[i]:
66            break
67        if hash and len(lines[i]) == 0:
68            break
69        if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
70            break
71        if "\tcitrus Id: " in lines[i]:
72            break
73        if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
74            break
75        if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
76            break
77        # OpenBSD likes to say where stuff originally came from:
78        if "Original version ID:" in lines[i]:
79            break
80        i += 1
81
82    end = i
83
84    # Trim trailing cruft.
85    while end > 0:
86        if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
87            break
88        end -= 1
89
90    # Remove C/assembler comment formatting, pulling out just the text.
91    clean_lines = []
92    for line in lines[start:end]:
93        line = line.replace("\t", "    ")
94        line = line.replace("/* ", "")
95        line = re.sub("^ \* ", "", line)
96        line = line.replace("** ", "")
97        line = line.replace("# ", "")
98        if "SPDX-License-Identifier:" in line:
99            continue
100        if line.startswith("++Copyright++"):
101            continue
102        line = line.replace("--Copyright--", "")
103        line = line.rstrip()
104        # These come last and take care of "blank" comment lines.
105        if line == "#" or line == " *" or line == "**" or line == "-":
106            line = ""
107        clean_lines.append(line)
108
109    # Trim blank lines from head and tail.
110    while clean_lines[0] == "":
111        clean_lines = clean_lines[1:]
112    while clean_lines[len(clean_lines) - 1] == "":
113        clean_lines = clean_lines[0:(len(clean_lines) - 1)]
114
115    copyright = "\n".join(clean_lines)
116    copyrights.add(copyright)
117
118    return i
119
120
121def do_file(path):
122    with open(path, "r") as the_file:
123        try:
124            content = open(path, "r").read().decode("utf-8")
125        except UnicodeDecodeError:
126            warn("bad UTF-8 in %s" % path)
127            content = open(path, "r").read().decode("iso-8859-1")
128
129    lines = content.split("\n")
130
131    if len(lines) <= 4:
132        warn_verbose("ignoring short file %s" % path)
133        return
134
135    if is_auto_generated(content):
136        warn_verbose("ignoring auto-generated file %s" % path)
137        return
138
139    if not "Copyright" in content:
140        if "public domain" in content.lower():
141            warn_verbose("ignoring public domain file %s" % path)
142            return
143        warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
144        return
145
146    # Manually iterate because extract_copyright_at tells us how many lines to skip.
147    i = 0
148    while i < len(lines):
149        if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
150            i = extract_copyright_at(lines, i)
151        else:
152            i += 1
153
154
155def do_dir(path):
156    for directory, sub_directories, filenames in os.walk(arg):
157        if ".git" in sub_directories:
158            sub_directories.remove(".git")
159        sub_directories = sorted(sub_directories)
160
161        for filename in sorted(filenames):
162            path = os.path.join(directory, filename)
163            if is_interesting(path):
164                do_file(path)
165
166
167args = sys.argv[1:]
168if len(args) == 0:
169    args = [ "." ]
170
171for arg in args:
172    if os.path.isdir(arg):
173        do_dir(arg)
174    else:
175        do_file(arg)
176
177for copyright in sorted(copyrights):
178    print copyright.encode("utf-8")
179    print
180    print "-------------------------------------------------------------------"
181    print
182
183sys.exit(0)
184