1#!/usr/bin/env python 2# 3# Copyright (C) 2012 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16""" 17Usage: generate-notice-files --text-output [plain text output file] \ 18 --html-output [html output file] \ 19 --xml-output [xml output file] \ 20 -t [file title] -s [directory of notices] 21 22Generate the Android notice files, including both text and html files. 23 24-h to display this usage message and exit. 25""" 26from collections import defaultdict 27import argparse 28import hashlib 29import itertools 30import os 31import os.path 32import re 33import sys 34 35MD5_BLOCKSIZE = 1024 * 1024 36HTML_ESCAPE_TABLE = { 37 "&": "&", 38 '"': """, 39 "'": "'", 40 ">": ">", 41 "<": "<", 42 } 43 44def hexify(s): 45 return ("%02x"*len(s)) % tuple(map(ord, s)) 46 47def md5sum(filename): 48 """Calculate an MD5 of the file given by FILENAME, 49 and return hex digest as a string. 50 Output should be compatible with md5sum command""" 51 52 f = open(filename, "rb") 53 sum = hashlib.md5() 54 while 1: 55 block = f.read(MD5_BLOCKSIZE) 56 if not block: 57 break 58 sum.update(block) 59 f.close() 60 return hexify(sum.digest()) 61 62 63def html_escape(text): 64 """Produce entities within text.""" 65 return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text) 66 67HTML_OUTPUT_CSS=""" 68<style type="text/css"> 69body { padding: 0; font-family: sans-serif; } 70.same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; } 71.label { font-weight: bold; } 72.file-list { margin-left: 1em; color: blue; } 73</style> 74""" 75 76def combine_notice_files_html(file_hash, input_dir, output_filename): 77 """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME.""" 78 79 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 80 81 # Set up a filename to row id table (anchors inside tables don't work in 82 # most browsers, but href's to table row ids do) 83 id_table = {} 84 id_count = 0 85 for value in file_hash: 86 for filename in value: 87 id_table[filename] = id_count 88 id_count += 1 89 90 # Open the output file, and output the header pieces 91 output_file = open(output_filename, "wb") 92 93 print >> output_file, "<html><head>" 94 print >> output_file, HTML_OUTPUT_CSS 95 print >> output_file, '</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">' 96 97 # Output our table of contents 98 print >> output_file, '<div class="toc">' 99 print >> output_file, "<ul>" 100 101 # Flatten the list of lists into a single list of filenames 102 sorted_filenames = sorted(itertools.chain.from_iterable(file_hash)) 103 104 # Print out a nice table of contents 105 for filename in sorted_filenames: 106 stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename) 107 print >> output_file, '<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename) 108 109 print >> output_file, "</ul>" 110 print >> output_file, "</div><!-- table of contents -->" 111 # Output the individual notice file lists 112 print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">' 113 for value in file_hash: 114 print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0]) 115 print >> output_file, '<div class="label">Notices for file(s):</div>' 116 print >> output_file, '<div class="file-list">' 117 for filename in value: 118 print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename)) 119 print >> output_file, "</div><!-- file-list -->" 120 print >> output_file 121 print >> output_file, '<pre class="license-text">' 122 print >> output_file, html_escape(open(value[0]).read()) 123 print >> output_file, "</pre><!-- license-text -->" 124 print >> output_file, "</td></tr><!-- same-license -->" 125 print >> output_file 126 print >> output_file 127 print >> output_file 128 129 # Finish off the file output 130 print >> output_file, "</table>" 131 print >> output_file, "</body></html>" 132 output_file.close() 133 134def combine_notice_files_text(file_hash, input_dir, output_filename, file_title): 135 """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME.""" 136 137 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 138 output_file = open(output_filename, "wb") 139 print >> output_file, file_title 140 for value in file_hash: 141 print >> output_file, "============================================================" 142 print >> output_file, "Notices for file(s):" 143 for filename in value: 144 print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename) 145 print >> output_file, "------------------------------------------------------------" 146 print >> output_file, open(value[0]).read() 147 output_file.close() 148 149def combine_notice_files_xml(files_with_same_hash, input_dir, output_filename): 150 """Combine notice files in FILE_HASH and output a XML version to OUTPUT_FILENAME.""" 151 152 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 153 154 # Set up a filename to row id table (anchors inside tables don't work in 155 # most browsers, but href's to table row ids do) 156 id_table = {} 157 for file_key in files_with_same_hash.keys(): 158 for filename in files_with_same_hash[file_key]: 159 id_table[filename] = file_key 160 161 # Open the output file, and output the header pieces 162 output_file = open(output_filename, "wb") 163 164 print >> output_file, '<?xml version="1.0" encoding="utf-8"?>' 165 print >> output_file, "<licenses>" 166 167 # Flatten the list of lists into a single list of filenames 168 sorted_filenames = sorted(id_table.keys()) 169 170 # Print out a nice table of contents 171 for filename in sorted_filenames: 172 stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename) 173 print >> output_file, '<file-name contentId="%s">%s</file-name>' % (id_table.get(filename), stripped_filename) 174 175 print >> output_file 176 print >> output_file 177 178 processed_file_keys = [] 179 # Output the individual notice file lists 180 for filename in sorted_filenames: 181 file_key = id_table.get(filename) 182 if file_key in processed_file_keys: 183 continue 184 processed_file_keys.append(file_key) 185 186 print >> output_file, '<file-content contentId="%s"><![CDATA[%s]]></file-content>' % (file_key, html_escape(open(filename).read())) 187 print >> output_file 188 189 # Finish off the file output 190 print >> output_file, "</licenses>" 191 output_file.close() 192 193def get_args(): 194 parser = argparse.ArgumentParser() 195 parser.add_argument( 196 '--text-output', required=True, 197 help='The text output file path.') 198 parser.add_argument( 199 '--html-output', 200 help='The html output file path.') 201 parser.add_argument( 202 '--xml-output', 203 help='The xml output file path.') 204 parser.add_argument( 205 '-t', '--title', required=True, 206 help='The file title.') 207 parser.add_argument( 208 '-s', '--source-dir', required=True, 209 help='The directory containing notices.') 210 parser.add_argument( 211 '-i', '--included-subdirs', action='append', 212 help='The sub directories which should be included.') 213 parser.add_argument( 214 '-e', '--excluded-subdirs', action='append', 215 help='The sub directories which should be excluded.') 216 return parser.parse_args() 217 218def main(argv): 219 args = get_args() 220 221 txt_output_file = args.text_output 222 html_output_file = args.html_output 223 xml_output_file = args.xml_output 224 file_title = args.title 225 included_subdirs = [] 226 excluded_subdirs = [] 227 if args.included_subdirs is not None: 228 included_subdirs = args.included_subdirs 229 if args.excluded_subdirs is not None: 230 excluded_subdirs = args.excluded_subdirs 231 232 # Find all the notice files and md5 them 233 input_dir = os.path.normpath(args.source_dir) 234 files_with_same_hash = defaultdict(list) 235 for root, dir, files in os.walk(input_dir): 236 for file in files: 237 matched = True 238 if len(included_subdirs) > 0: 239 matched = False 240 for subdir in included_subdirs: 241 if (root == (input_dir + '/' + subdir) or 242 root.startswith(input_dir + '/' + subdir + '/')): 243 matched = True 244 break 245 elif len(excluded_subdirs) > 0: 246 for subdir in excluded_subdirs: 247 if (root == (input_dir + '/' + subdir) or 248 root.startswith(input_dir + '/' + subdir + '/')): 249 matched = False 250 break 251 if matched and file.endswith(".txt"): 252 filename = os.path.join(root, file) 253 file_md5sum = md5sum(filename) 254 files_with_same_hash[file_md5sum].append(filename) 255 256 filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(files_with_same_hash.keys())] 257 258 combine_notice_files_text(filesets, input_dir, txt_output_file, file_title) 259 260 if html_output_file is not None: 261 combine_notice_files_html(filesets, input_dir, html_output_file) 262 263 if xml_output_file is not None: 264 combine_notice_files_xml(files_with_same_hash, input_dir, xml_output_file) 265 266if __name__ == "__main__": 267 main(sys.argv) 268