find all sourcecode files, that do not have a Copyright notice - Python script

A simple Python script, to find all sourcecode files, that do not have a Copyright notice.

This can be useful, if you are working on a large codebase, and there is a requirement that *all* sourcecode files have a copyright notice.

This script will find all sourcecode files, that are missing the copyright notice.

1:  """  
2:   findSourcecodeNoCopyright.py  
3:   Author: Sean Ryan  
4:   Version: 1.0  
5:    
6:   Script to recursively find sourcecode files, that do not have any copyright information.  
7:   This is useful if you are working on a large codebase, and there is a requirement that *all* sourcecode files, have a copyright notice.  
8:    
9:  Dependencies: Python 2.7 (3)  
10:     
11:  Usage: findSourcecodeNoCopyright.py <source directory> <semi-colon separated list of extensions> [options]  
12:    
13:  The options are:  
14:  [-h help]  
15:  [-i ignore file extensions]  
16:  [-s skip directores]  
17:  [-w show Warnings only]  
18:    
19:  Example: search for .NET source code files, in the c:\\sourcecode directory and all child directories, that do not have a copyright notice:  
20:  findSourcecodeNoCopyright.py c:\\sourcecode cs;vbs  
21:    
22:  Example: search ALL files, in the c:\\sourcecode directory and all child directories, that do not have a copyright notice:  
23:  findSourcecodeNoCopyright.py c:\\sourcecode *  
24:    
25:  Example: search for .NET source code files, in the c:\\sourcecode directory and all child directories, that do not have a copyright notice.  
26:  Ignore files with extension designer.cs or Test.cs (case in-sensitive).  
27:  Skip directories named obj or debug.  
28:  findSourcecodeNoCopyright.py c:\\sourcecode cs;vbs -idesigner.cs;Test.cs -sobj;debug  
29:  """  
30:  ###############################################################  
31:    
32:  from optparse import OptionParser  
33:  import getopt  
34:  import sys  
35:  import re  
36:  import os  
37:  import shutil  
38:  import datetime  
39:  import time  
40:  from os.path import exists, join  
41:    
42:  #pathsep ; on windows , on unix  
43:  from os import pathsep  
44:    
45:  #from string import split  
46:    
47:  ###############################################################  
48:  # Define some defaults:  
49:  sourceDirPath = '' #location to search for files  
50:  sizeInBytes = 0  
51:    
52:  #LOG_WARNINGS_ONLY - this means, only output if the verbosity is LOG_WARNINGS  
53:  LOG_WARNINGS, LOG_WARNINGS_ONLY, LOG_VERBOSE = range(3)  
54:  logVerbosity = LOG_VERBOSE  
55:    
56:  extensions_list = set()  
57:  extensions_to_ignore_list = set()  
58:  directories_to_ignore_list = set()  
59:    
60:  ###############################################################  
61:  #ask_ok() - prompts the user to continue  
62:  def ask_ok(prompt, retries=3, complaint='Yes or no, please!'):  
63:    global yesAllPrompts  
64:    if yesAllPrompts:  
65:      print (prompt + " (Y)")  
66:      return True  
67:    while True:  
68:      ok = raw_input(prompt)  
69:      if ok in ('y', 'ye', 'yes'):  
70:        return True  
71:      if ok in ('n', 'no', 'nop', 'nope'):  
72:        return False  
73:      retries = retries - 1  
74:      if retries < 0:  
75:        raise IOError('refusenik user')  
76:      print (complaint)  
77:    
78:  ###############################################################  
79:  #usage() - prints out the usage text, from the top of this file :-)  
80:  def usage():  
81:    print (__doc__)  
82:    
83:  dateTimeFormat = '%d %m %Y %H:%M'  
84:  datetime.datetime.strptime('01 12 2006 12:32', dateTimeFormat)  
85:      
86:  ###############################################################  
87:  #optparse - parse the args  
88:  parser = OptionParser(usage='%prog <source directory> <size in bytes> [options]')  
89:  parser.add_option('-i', '--ignore', dest='ignoreExtensions', default="",  
90:            help='ignore file extensions')  
91:  parser.add_option('-s', '--skip', dest='skipDirectories', default="",  
92:            help='skip directories')  
93:  parser.add_option('-w', '--warnings', dest='warnings', action='store_const',  
94:            const=LOG_WARNINGS, default=LOG_VERBOSE,  
95:            help='show only warnings (default: show all output)')  
96:  parser.add_option('-y', '--yes', dest='yes_all', action='store_const',  
97:            const=True, default=False,  
98:            help='automatically say Yes to allow prompts (default: prompt user)')  
99:    
100:  (options, args) = parser.parse_args()  
101:  if(len(args) != 2):  
102:    usage()  
103:    sys.exit(2)  
104:  logVerbosity = options.warnings  
105:  sourceDirPath = args[0]  
106:  extensions = args[1]  
107:  extensions_to_ignore_list = options.ignoreExtensions.split(';')  
108:  directories_to_ignore_list = options.skipDirectories.split(';')  
109:  yesAllPrompts = options.yes_all  
110:    
111:  ###############################################################  
112:  #copy the args to our variables  
113:  extensions_list = extensions.split(';')  
114:    
115:  ###############################################################  
116:  #print out summary of the configuration, and prompt user to continue:  
117:  print ("Configuration:")  
118:  print ("--------------")  
119:    
120:  print ("sourceDirPath: " + sourceDirPath + "\n")  
121:  print ("extensions: ")  
122:  for ext in extensions_list:  
123:    print (" " + ext)  
124:    
125:  print ("extensions to ignore: ")  
126:  for ext in extensions_to_ignore_list:  
127:    print (" " + ext)  
128:    
129:  print ("directories to ignore: ")  
130:  for dir in directories_to_ignore_list:  
131:    print (" " + dir)  
132:    
133:  print ("")  
134:    
135:  if logVerbosity == LOG_WARNINGS:  
136:    print ("Output will show warnings only\n")  
137:  elif logVerbosity == LOG_VERBOSE:  
138:    print ("Output is verbose\n")  
139:  else:  
140:    print ("Invalid verbosity level: " + logVerbosity)  
141:    sys.exit(1)  
142:    
143:  print ("We will recursively search for all matching files, that have no copyright notice.")  
144:    
145:  print ("")  
146:    
147:  if ask_ok("Do you wish to continue ? (Y/N)"):  
148:    #do nothing  
149:    print ("ok")  
150:  else:  
151:    print ("Exiting")  
152:    sys.exit()  
153:      
154:  print ("")  
155:    
156:  print ("Searching for files ...\n")  
157:    
158:  numWarnings = 0  
159:    
160:  ###############################################################  
161:  #printOut()  
162:  #this function prints out, according to user's options for verbosity  
163:  def printOut(txt, verb = LOG_VERBOSE, bNewLine = True):  
164:    global logVerbosity  
165:    if(bNewLine):  
166:      txt = txt + "\n"  
167:    if verb == LOG_WARNINGS_ONLY:  
168:      if logVerbosity == LOG_WARNINGS: #special case :-(  
169:        sys.stdout.write(txt)  
170:    elif(logVerbosity >= verb):  
171:      sys.stdout.write(txt)  
172:    
173:  ###############################################################  
174:  #IsFileExtensionOk() - does this filename match the list of extensions given by user  
175:  def IsFileExtensionOk(filename):  
176:    global extensions_list  
177:    global extensions_to_ignore_list  
178:    
179:    isExtensionOk = False  
180:    for ext in extensions_list:  
181:      ext = ext.lower()  
182:      if(ext == '*'):  
183:        isExtensionOk = True  
184:        break  
185:      if (filename.lower().endswith("." + ext)):  
186:        isExtensionOk = True  
187:        break  
188:    
189:    if not isExtensionOk:  
190:      return False  
191:    
192:    for ext in extensions_to_ignore_list:  
193:      ext = ext.lower()  
194:      if (filename.lower().endswith("." + ext)):  
195:        return False  
196:    
197:    return isExtensionOk  
198:      
199:  ###############################################################  
200:  #IsFileSizeOk() - does this file have the same size given by user  
201:  def IsFileSizeOk(filePath):  
202:    global sizeInBytes  
203:    fileSizeInBytes = os.path.getsize(filePath)  
204:    if (sizeInBytes == fileSizeInBytes):  
205:      return True  
206:    else:  
207:      return False  
208:    
209:  ###############################################################  
210:  #DoesFileContainCopyright - does the file at given path, contain a copyright notice  
211:  def DoesFileContainCopyright(filename):  
212:    file = open(filename, 'r')  
213:    copyright = "copyright"  
214:    for line in file:  
215:      if copyright in line.lower():  
216:        return True  
217:    return False  
218:    
219:  def IsDirectoryOk(dirpath):  
220:    global directories_to_ignore_list  
221:    dirSeparator = '\\' #TODO add support for Unix  
222:    dirname = dirpath.split(dirSeparator)  
223:    dirname = dirname[len(dirname) - 1]  
224:    if(dirname in directories_to_ignore_list):  
225:        return False  
226:    return True  
227:    
228:  ###############################################################  
229:  #search_files - recursively search the given directory, and populate the map with files that match our list of extensions  
230:  def search_files_by_ext(dir):  
231:    iNumFilesFoundLocal = 0  
232:    basedir = dir  
233:    subdirlist = []  
234:      
235:    printOut("Searching dir: " + dir)  
236:    
237:    filesInDir = []  
238:    try:  
239:      filesInDir = os.listdir(dir)  
240:    except WindowsError:  
241:      printOut("Error occurred accessing directory " + dir);  
242:      return 0  
243:      
244:    for filename in filesInDir:  
245:      filePath = os.path.join(basedir,filename)  
246:      if os.path.isfile(filePath):  
247:        if IsFileExtensionOk(filename):  
248:          if not DoesFileContainCopyright(filePath):  
249:            printOut ("File found: " + filePath, LOG_WARNINGS)  
250:            iNumFilesFoundLocal = iNumFilesFoundLocal + 1  
251:      else:  
252:        subdirlist.append(filePath)  
253:    for subdir in subdirlist:  
254:      if IsDirectoryOk(subdir):  
255:        try:  
256:          iNumFilesFoundLocal += search_files_by_ext(subdir)  
257:        except WindowsError:  
258:          printOut("Error occurred accessing directory " + subdir);  
259:    return iNumFilesFoundLocal  
260:    
261:  ###############################################################  
262:  #search for source files, that match the extensions given by user  
263:  printOut ("Matching files:" + "\n" + "-----------------")  
264:  iNumFilesFound = 0  
265:  iNumFilesFound = search_files_by_ext(sourceDirPath)  
266:    
267:  ###############################################################  
268:  #print summary of results      
269:  print ("")  
270:  print ("Found " + str(iNumFilesFound) + " matching files that do not have any copyright notice.")  
271:  print (str(numWarnings) + " warnings")  
272:    

Comments