scripts/refactor_count.py

   1 import os, re, sys
   2
   3 alphanum = "0123456789abcdefghijklmnopqrstuvwzyxABCDEFGHIJKLMNOPQRSTUVWXYZ_"
   4 cpp_keywords = ["auto", "const", "double", "float", "int", "short", "struct", "unsigned", # C
   5 "break", "continue", "else", "for", "long", "signed", "switch", "void",
   6 "case", "default", "enum", "goto", "register", "sizeof", "typedef", "volatile",
   7 "char", "do", "extern", "if", "return", "static", "union", "while",
   8
   9 "asm", "dynamic_cast", "namespace", "reinterpret_cast", "try", # C++
  10 "bool", "explicit", "new", "static_cast", "typeid",
  11 "catch", "false", "operator", "template", "typename",
  12 "class", "friend", "private", "this", "using",
  13 "const_cast", "inline", "public", "throw", "virtual",
  14 "delete", "mutable", "protected", "true", "wchar_t"]
  15
  16 allowed_words = []
  17
  18 #allowed_words += ["bitmap_left", "advance", "glyph"] # ft2
  19
  20
  21 allowed_words += ["qsort"] # stdio / stdlib
  22 allowed_words += ["size_t", "cosf", "sinf", "asinf", "acosf", "atanf", "powf", "fabs", "rand", "powf", "fmod", "sqrtf"] # math.h
  23 allowed_words += ["time_t", "time", "strftime", "localtime"] # time.h
  24 allowed_words += [ # system.h
  25         "int64",
  26         "dbg_assert", "dbg_msg", "dbg_break", "dbg_logger_stdout", "dbg_logger_debugger", "dbg_logger_file",
  27         "mem_alloc", "mem_zero", "mem_free", "mem_copy", "mem_move", "mem_comp", "mem_stats", "total_allocations", "allocated",
  28         "thread_create", "thread_sleep", "lock_wait", "lock_create", "lock_release", "lock_destroy", "swap_endian",
  29         "io_open", "io_read", "io_read", "io_write", "io_flush", "io_close", "io_seek", "io_skip", "io_tell", "io_length",
  30         "str_comp", "str_length", "str_quickhash", "str_format", "str_copy", "str_comp_nocase", "str_sanitize", "str_append",
  31         "str_comp_num", "str_find_nocase", "str_sanitize_strong", "str_uppercase", "str_toint", "str_tofloat",
  32         "str_utf8_encode", "str_utf8_rewind", "str_utf8_forward", "str_utf8_decode", "str_sanitize_cc", "str_skip_whitespaces",
  33         "fs_makedir", "fs_listdir", "fs_storage_path", "fs_is_dir",
  34         "net_init", "net_addr_comp", "net_host_lookup", "net_addr_str", "type", "port", "net_addr_from_str",
  35         "net_udp_create", "net_udp_send", "net_udp_recv", "net_udp_close", "net_socket_read_wait",
  36         "net_stats", "sent_bytes", "recv_bytes", "recv_packets", "sent_packets",
  37         "time_get", "time_freq", "time_timestamp"]
  38
  39 allowed_words += ["vec2", "vec3", "vec4", "round", "clamp", "length", "dot", "normalize", "frandom", "mix", "distance", "min",
  40         "closest_point_on_line", "max", "absolute"] # math.hpp
  41 allowed_words += [  # tl
  42         "array", "sorted_array", "string",
  43         "all", "sort", "add", "remove_index", "remove", "delete_all", "set_size",
  44         "base_ptr", "size", "swap", "empty", "front", "pop_front", "find_binary", "find_linear", "clear", "range", "end", "cstr",
  45         "partition_linear", "partition_binary"]
  46 allowed_words += ["fx2f", "f2fx"] # fixed point math
  47
  48 def CheckIdentifier(ident):
  49         return False
  50
  51 class Checker:
  52         def CheckStart(self, checker, filename):
  53                 pass
  54         def CheckLine(self, checker, line):
  55                 pass
  56         def CheckEnd(self, checker):
  57                 pass
  58
  59 class FilenameExtentionChecker(Checker):
  60         def __init__(self):
  61                 self.allowed = [".cpp", ".h"]
  62         def CheckStart(self, checker, filename):
  63                 ext = os.path.splitext(filename)[1]
  64                 if not ext in self.allowed:
  65                         checker.Error("file extention '%s' is not allowed" % ext)
  66
  67 class IncludeChecker(Checker):
  68         def __init__(self):
  69                 self.disallowed_headers = ["stdio.h", "stdlib.h", "string.h", "memory.h"]
  70         def CheckLine(self, checker, line):
  71                 if "#include" in line:
  72                         include_file = ""
  73                         if '<' in line:
  74                                 include_file = line.split('<')[1].split(">")[0]
  75                                 #if not "/" in include_file:
  76                                 #       checker.Error("%s is not allowed" % include_file)
  77                         elif '"' in line:
  78                                 include_file = line.split('"')[1]
  79
  80                         #print include_file
  81                         if include_file in self.disallowed_headers:
  82                                 checker.Error("%s is not allowed" % include_file)
  83
  84 class HeaderGuardChecker(Checker):
  85         def CheckStart(self, checker, filename):
  86                 self.check = ".h" in filename
  87                 self.guard = "#ifndef " + filename[4:].replace("/", "_").replace(".hpp", "").replace(".h", "").upper() + "_H"
  88         def CheckLine(self, checker, line):
  89                 if self.check:
  90                         #if "#" in line:
  91                         self.check = False
  92                         #if not self.check:
  93                         if line.strip() ==  self.guard:
  94                                 pass
  95                         else:
  96                                 checker.Error("malformed or missing header guard. Should be '%s'" % self.guard)
  97
  98 class CommentChecker(Checker):
  99         def CheckLine(self, checker, line):
 100                 if line.strip()[-2:] == "*/" and "/*" in line:
 101                         checker.Error("single line multiline comment")
 102
 103 class FileChecker:
 104         def __init__(self):
 105                 self.checkers = []
 106                 self.checkers += [FilenameExtentionChecker()]
 107                 self.checkers += [HeaderGuardChecker()]
 108                 self.checkers += [IncludeChecker()]
 109                 self.checkers += [CommentChecker()]
 110
 111         def Error(self, errormessage):
 112                 self.current_errors += [(self.current_line, errormessage)]
 113
 114         def CheckLine(self, line):
 115                 for c in self.checkers:
 116                         c.CheckLine(self, line)
 117                 return True
 118
 119         def CheckFile(self, filename):
 120                 self.current_file = filename
 121                 self.current_line = 0
 122                 self.current_errors = []
 123                 for c in self.checkers:
 124                         c.CheckStart(self, filename)
 125
 126                 for line in file(filename).readlines():
 127                         self.current_line += 1
 128                         if "ignore_check" in line:
 129                                 continue
 130                         self.CheckLine(line)
 131
 132                 for c in self.checkers:
 133                         c.CheckEnd(self)
 134
 135         def GetErrors(self):
 136                 return self.current_errors
 137
 138 def cstrip(lines):
 139         d = ""
 140         for l in lines:
 141                 if "ignore_convention" in l:
 142                         continue
 143                 l = re.sub("^[\t ]*#.*", "", l)
 144                 l = re.sub("//.*", "", l)
 145                 l = re.sub('\".*?\"', '"String"', l) # remove strings
 146                 d += l.strip() + " "
 147         d = re.sub('\/\*.*?\*\/', "", d) # remove /* */ comments
 148         d = d.replace("\t", " ") # tab to space
 149         d = re.sub("  *", " ", d) # remove double spaces
 150         #d = re.sub("", "", d) # remove /* */ comments
 151
 152         d = d.strip()
 153
 154         # this eats up cases like 'n {'
 155         i = 1
 156         while i < len(d)-2:
 157                 if d[i] == ' ':
 158                         if not (d[i-1] in alphanum and d[i+1] in alphanum):
 159                                 d = d[:i] + d[i+1:]
 160                 i += 1
 161         return d
 162
 163 #def stripstrings(data):
 164 #       return re.sub('\".*?\"', 'STRING', data)
 165
 166 def get_identifiers(data):
 167         idents = {}
 168         data = " "+data+" "
 169         regexp = re.compile("[^a-zA-Z0-9_][a-zA-Z_][a-zA-Z0-9_]+[^a-zA-Z0-9_]")
 170         start = 0
 171         while 1:
 172                 m = regexp.search(data, start)
 173
 174                 if m == None:
 175                         break
 176                 start = m.end()-1
 177                 name = data[m.start()+1:m.end()-1]
 178                 if name in idents:
 179                         idents[name] += 1
 180                 else:
 181                         idents[name] = 1
 182         return idents
 183
 184 grand_total = 0
 185 grand_offenders = 0
 186
 187 gen_html = 1
 188
 189 if gen_html:
 190         print "<head>"
 191         print '<link href="/style.css" rel="stylesheet" type="text/css" />'
 192         print "</head>"
 193         print "<body>"
 194
 195
 196
 197         print '<div id="outer">'
 198
 199         print '<div id="top_left"><div id="top_right"><div id="top_mid">'
 200         print '<a href="/"><img src="/images/twlogo.png" alt="teeworlds logo" /></a>'
 201         print '</div></div></div>'
 202
 203         print '<div id="menu_left"><div id="menu_right"><div id="menu_mid">'
 204         print '</div></div></div>'
 205
 206         print '<div id="tlc"><div id="trc"><div id="tb">&nbsp;</div></div></div>'
 207         print '<div id="lb"><div id="rb"><div id="mid">'
 208         print '<div id="container">'
 209
 210         print '<p class="topic_text">'
 211         print '<h1>Code Refactoring Progress</h1>'
 212         print '''This is generated by a script that find identifiers in the code
 213         that doesn't conform to the code standard. Right now it only shows headers
 214         because they need to be fixed before we can do the rest of the source.
 215         This is a ROUGH estimate of the progress'''
 216         print '</p>'
 217
 218         print '<p class="topic_text">'
 219         print '<table>'
 220         #print "<tr><td><b>%</b></td><td><b>#</b></td><td><b>File</b></td><td><b>Offenders</b></td></tr>"
 221
 222 line_order = 1
 223 total_files = 0
 224 complete_files = 0
 225 total_errors = 0
 226
 227 for (root,dirs,files) in os.walk("src"):
 228         for filename in files:
 229                 filename = os.path.join(root, filename)
 230                 if "/." in filename or "/external/" in filename or "/base/" in filename or "/generated/" in filename:
 231                         continue
 232                 if "src/osxlaunch/client.h" in filename: # ignore this file, ObjC file
 233                         continue
 234                 if "e_config_variables.h" in filename: # ignore config files
 235                         continue
 236                 if "src/game/variables.hpp" in filename: # ignore config files
 237                         continue
 238
 239                 if not (".hpp" in filename or ".h" in filename or ".cpp" in filename):
 240                         continue
 241
 242                 #total_files += 1
 243
 244                 #if not "src/engine/client/ec_client.cpp" in filename:
 245                 #       continue
 246
 247                 f = FileChecker()
 248                 f.CheckFile(filename)
 249                 num_errors = len(f.GetErrors())
 250                 total_errors += num_errors
 251
 252                 if num_errors:
 253                         print '<tr style="background: #e0e0e0"><td colspan="2">%s, %d errors</td></tr>' % (filename, num_errors),
 254                         for line, msg in f.GetErrors():
 255                                 print '<tr"><td>%d</td><td>%s</td></tr>' % (line, msg)
 256                         #print '<table>'
 257                         #GetErrors()
 258
 259
 260
 261
 262                 if 0:
 263                         text = cstrip(file(filename).readlines()) # remove all preprocessor stuff and comments
 264                         #text = stripstrings(text) # remove strings (does not solve all cases however)
 265                         #print text
 266
 267                         idents = get_identifiers(text)
 268                         offenders = 0
 269                         total = 0
 270                         offender_list = {}
 271                         for name in idents:
 272                                 #print name
 273                                 if len(name) <= 2: # skip things that are too small
 274                                         continue
 275                                 if name in cpp_keywords: # skip keywords
 276                                         continue
 277                                 if name in allowed_words: # skip allowed keywords
 278                                         continue
 279
 280                                 total += idents[name]
 281                                 if name != name.lower(): # strip names that are not only lower case
 282                                         continue
 283                                 offender_list[name] = idents[name]
 284                                 if not gen_html:
 285                                         print "[%d] %s"%(idents[name], name)
 286                                 offenders += idents[name]
 287
 288                         grand_total += total
 289                         grand_offenders += offenders
 290
 291                         if total == 0:
 292                                 total = 1
 293
 294                         line_order = -line_order
 295
 296
 297                         done = int((1-(offenders / float(total))) * 100)
 298                         if done == 100:
 299                                 complete_files += 1
 300
 301                         if done != 100 and gen_html:
 302                                 color = "#ffa0a0"
 303                                 if done > 20:
 304                                         color = "#ffd080"
 305                                 if done > 50:
 306                                         color = "#ffff80"
 307                                 if done > 75:
 308                                         color = "#e0ff80"
 309                                 if done == 100:
 310                                         color = "#80ff80"
 311
 312                                 line_color = "#f0efd5"
 313                                 if line_order > 0:
 314                                         line_color = "#ffffff"
 315
 316                                 offender_string = ""
 317                                 count = 0
 318                                 for name in offender_list:
 319                                         count += 1
 320                                         offender_string += "[%d]%s " % (offender_list[name], name)
 321
 322                                         if count%5 == 0:
 323                                                 offender_string += "<br/>"
 324
 325                                 print '<tr style="background: %s">' % line_color,
 326                                 print '<td style="text-align: right; background: %s"><b>%d%%</b></td><td style="text-align: center">%d</td><td>%s</td>' % (color, done, offenders, filename),
 327                                 print '<td style="text-align: right">%s</td>' % offender_string
 328                                 print "</tr>"
 329                         count = 0
 330
 331 if gen_html:
 332         print "</table>"
 333
 334         print "<h1>%d errors</h1>" % total_errors
 335
 336
 337         if 0:
 338                 print "<h1>%.1f%% Identifiers done</h1>" % ((1-(grand_offenders / float(grand_total))) * 100)
 339                 print "%d left of %d" % (grand_offenders, grand_total)
 340                 print "<h1>%.1f%% Files done</h1>" % ((complete_files / float(total_files)) * 100)
 341                 print "%d left of %d" % (total_files-complete_files, total_files)
 342
 343         print "</p>"
 344         print "<div style='clear:both;'></div>"
 345         print '</div>'
 346         print '</div></div></div>'
 347
 348         print '<div id="blc"><div id="brc"><div id="bb">&nbsp;</div></div></div>'
 349         print '</div>'
 350
 351         print "</body>"