A simple Python script, to find all sourcecode files, that do not have a Copyright notice.
This can be useful, if you are working on a large codebase, and there is a requirement that *all* sourcecode files have a copyright notice.
This script will find all sourcecode files, that are missing the copyright notice.
This can be useful, if you are working on a large codebase, and there is a requirement that *all* sourcecode files have a copyright notice.
This script will find all sourcecode files, that are missing the copyright notice.
1: """
2: findSourcecodeNoCopyright.py
3: Author: Sean Ryan
4: Version: 1.0
5:
6: Script to recursively find sourcecode files, that do not have any copyright information.
7: This is useful if you are working on a large codebase, and there is a requirement that *all* sourcecode files, have a copyright notice.
8:
9: Dependencies: Python 2.7 (3)
10:
11: Usage: findSourcecodeNoCopyright.py <source directory> <semi-colon separated list of extensions> [options]
12:
13: The options are:
14: [-h help]
15: [-i ignore file extensions]
16: [-s skip directores]
17: [-w show Warnings only]
18:
19: Example: search for .NET source code files, in the c:\\sourcecode directory and all child directories, that do not have a copyright notice:
20: findSourcecodeNoCopyright.py c:\\sourcecode cs;vbs
21:
22: Example: search ALL files, in the c:\\sourcecode directory and all child directories, that do not have a copyright notice:
23: findSourcecodeNoCopyright.py c:\\sourcecode *
24:
25: Example: search for .NET source code files, in the c:\\sourcecode directory and all child directories, that do not have a copyright notice.
26: Ignore files with extension designer.cs or Test.cs (case in-sensitive).
27: Skip directories named obj or debug.
28: findSourcecodeNoCopyright.py c:\\sourcecode cs;vbs -idesigner.cs;Test.cs -sobj;debug
29: """
30: ###############################################################
31:
32: from optparse import OptionParser
33: import getopt
34: import sys
35: import re
36: import os
37: import shutil
38: import datetime
39: import time
40: from os.path import exists, join
41:
42: #pathsep ; on windows , on unix
43: from os import pathsep
44:
45: #from string import split
46:
47: ###############################################################
48: # Define some defaults:
49: sourceDirPath = '' #location to search for files
50: sizeInBytes = 0
51:
52: #LOG_WARNINGS_ONLY - this means, only output if the verbosity is LOG_WARNINGS
53: LOG_WARNINGS, LOG_WARNINGS_ONLY, LOG_VERBOSE = range(3)
54: logVerbosity = LOG_VERBOSE
55:
56: extensions_list = set()
57: extensions_to_ignore_list = set()
58: directories_to_ignore_list = set()
59:
60: ###############################################################
61: #ask_ok() - prompts the user to continue
62: def ask_ok(prompt, retries=3, complaint='Yes or no, please!'):
63: global yesAllPrompts
64: if yesAllPrompts:
65: print (prompt + " (Y)")
66: return True
67: while True:
68: ok = raw_input(prompt)
69: if ok in ('y', 'ye', 'yes'):
70: return True
71: if ok in ('n', 'no', 'nop', 'nope'):
72: return False
73: retries = retries - 1
74: if retries < 0:
75: raise IOError('refusenik user')
76: print (complaint)
77:
78: ###############################################################
79: #usage() - prints out the usage text, from the top of this file :-)
80: def usage():
81: print (__doc__)
82:
83: dateTimeFormat = '%d %m %Y %H:%M'
84: datetime.datetime.strptime('01 12 2006 12:32', dateTimeFormat)
85:
86: ###############################################################
87: #optparse - parse the args
88: parser = OptionParser(usage='%prog <source directory> <size in bytes> [options]')
89: parser.add_option('-i', '--ignore', dest='ignoreExtensions', default="",
90: help='ignore file extensions')
91: parser.add_option('-s', '--skip', dest='skipDirectories', default="",
92: help='skip directories')
93: parser.add_option('-w', '--warnings', dest='warnings', action='store_const',
94: const=LOG_WARNINGS, default=LOG_VERBOSE,
95: help='show only warnings (default: show all output)')
96: parser.add_option('-y', '--yes', dest='yes_all', action='store_const',
97: const=True, default=False,
98: help='automatically say Yes to allow prompts (default: prompt user)')
99:
100: (options, args) = parser.parse_args()
101: if(len(args) != 2):
102: usage()
103: sys.exit(2)
104: logVerbosity = options.warnings
105: sourceDirPath = args[0]
106: extensions = args[1]
107: extensions_to_ignore_list = options.ignoreExtensions.split(';')
108: directories_to_ignore_list = options.skipDirectories.split(';')
109: yesAllPrompts = options.yes_all
110:
111: ###############################################################
112: #copy the args to our variables
113: extensions_list = extensions.split(';')
114:
115: ###############################################################
116: #print out summary of the configuration, and prompt user to continue:
117: print ("Configuration:")
118: print ("--------------")
119:
120: print ("sourceDirPath: " + sourceDirPath + "\n")
121: print ("extensions: ")
122: for ext in extensions_list:
123: print (" " + ext)
124:
125: print ("extensions to ignore: ")
126: for ext in extensions_to_ignore_list:
127: print (" " + ext)
128:
129: print ("directories to ignore: ")
130: for dir in directories_to_ignore_list:
131: print (" " + dir)
132:
133: print ("")
134:
135: if logVerbosity == LOG_WARNINGS:
136: print ("Output will show warnings only\n")
137: elif logVerbosity == LOG_VERBOSE:
138: print ("Output is verbose\n")
139: else:
140: print ("Invalid verbosity level: " + logVerbosity)
141: sys.exit(1)
142:
143: print ("We will recursively search for all matching files, that have no copyright notice.")
144:
145: print ("")
146:
147: if ask_ok("Do you wish to continue ? (Y/N)"):
148: #do nothing
149: print ("ok")
150: else:
151: print ("Exiting")
152: sys.exit()
153:
154: print ("")
155:
156: print ("Searching for files ...\n")
157:
158: numWarnings = 0
159:
160: ###############################################################
161: #printOut()
162: #this function prints out, according to user's options for verbosity
163: def printOut(txt, verb = LOG_VERBOSE, bNewLine = True):
164: global logVerbosity
165: if(bNewLine):
166: txt = txt + "\n"
167: if verb == LOG_WARNINGS_ONLY:
168: if logVerbosity == LOG_WARNINGS: #special case :-(
169: sys.stdout.write(txt)
170: elif(logVerbosity >= verb):
171: sys.stdout.write(txt)
172:
173: ###############################################################
174: #IsFileExtensionOk() - does this filename match the list of extensions given by user
175: def IsFileExtensionOk(filename):
176: global extensions_list
177: global extensions_to_ignore_list
178:
179: isExtensionOk = False
180: for ext in extensions_list:
181: ext = ext.lower()
182: if(ext == '*'):
183: isExtensionOk = True
184: break
185: if (filename.lower().endswith("." + ext)):
186: isExtensionOk = True
187: break
188:
189: if not isExtensionOk:
190: return False
191:
192: for ext in extensions_to_ignore_list:
193: ext = ext.lower()
194: if (filename.lower().endswith("." + ext)):
195: return False
196:
197: return isExtensionOk
198:
199: ###############################################################
200: #IsFileSizeOk() - does this file have the same size given by user
201: def IsFileSizeOk(filePath):
202: global sizeInBytes
203: fileSizeInBytes = os.path.getsize(filePath)
204: if (sizeInBytes == fileSizeInBytes):
205: return True
206: else:
207: return False
208:
209: ###############################################################
210: #DoesFileContainCopyright - does the file at given path, contain a copyright notice
211: def DoesFileContainCopyright(filename):
212: file = open(filename, 'r')
213: copyright = "copyright"
214: for line in file:
215: if copyright in line.lower():
216: return True
217: return False
218:
219: def IsDirectoryOk(dirpath):
220: global directories_to_ignore_list
221: dirSeparator = '\\' #TODO add support for Unix
222: dirname = dirpath.split(dirSeparator)
223: dirname = dirname[len(dirname) - 1]
224: if(dirname in directories_to_ignore_list):
225: return False
226: return True
227:
228: ###############################################################
229: #search_files - recursively search the given directory, and populate the map with files that match our list of extensions
230: def search_files_by_ext(dir):
231: iNumFilesFoundLocal = 0
232: basedir = dir
233: subdirlist = []
234:
235: printOut("Searching dir: " + dir)
236:
237: filesInDir = []
238: try:
239: filesInDir = os.listdir(dir)
240: except WindowsError:
241: printOut("Error occurred accessing directory " + dir);
242: return 0
243:
244: for filename in filesInDir:
245: filePath = os.path.join(basedir,filename)
246: if os.path.isfile(filePath):
247: if IsFileExtensionOk(filename):
248: if not DoesFileContainCopyright(filePath):
249: printOut ("File found: " + filePath, LOG_WARNINGS)
250: iNumFilesFoundLocal = iNumFilesFoundLocal + 1
251: else:
252: subdirlist.append(filePath)
253: for subdir in subdirlist:
254: if IsDirectoryOk(subdir):
255: try:
256: iNumFilesFoundLocal += search_files_by_ext(subdir)
257: except WindowsError:
258: printOut("Error occurred accessing directory " + subdir);
259: return iNumFilesFoundLocal
260:
261: ###############################################################
262: #search for source files, that match the extensions given by user
263: printOut ("Matching files:" + "\n" + "-----------------")
264: iNumFilesFound = 0
265: iNumFilesFound = search_files_by_ext(sourceDirPath)
266:
267: ###############################################################
268: #print summary of results
269: print ("")
270: print ("Found " + str(iNumFilesFound) + " matching files that do not have any copyright notice.")
271: print (str(numWarnings) + " warnings")
272:
Comments
Post a Comment