.github/unused-strings.py

   1 """
   2 Script to scan the OpenTTD source-tree for STR_ entries that are defined but
   3 no longer used.
   4
   5 This is not completely trivial, as OpenTTD references a lot of strings in
   6 relation to another string. The most obvious example of this is a list. OpenTTD
   7 only references the first entry in the list, and does "+ <var>" to get to the
   8 correct string.
   9
  10 There are other ways OpenTTD does use relative values. This script tries to
  11 account for all of them, to give the best approximation we have for "this
  12 string is unused".
  13 """
  14
  15 import glob
  16 import os
  17 import re
  18 import subprocess
  19 import sys
  20
  21 from enum import Enum
  22
  23 LENGTH_NAME_LOOKUP = {
  24     "VEHICLE_TYPES": 4,
  25 }
  26
  27
  28 class SkipType(Enum):
  29     NONE = 1
  30     LENGTH = 2
  31     EXTERNAL = 3
  32     ZERO_IS_SPECIAL = 4
  33     EXPECT_NEWLINE = 5
  34
  35
  36 def read_language_file(filename, strings_found, errors):
  37     strings_defined = []
  38
  39     skip = SkipType.NONE
  40     length = 0
  41     common_prefix = ""
  42     last_tiny_string = ""
  43
  44     with open(filename) as fp:
  45         for line in fp.readlines():
  46             if not line.strip():
  47                 if skip == SkipType.EXPECT_NEWLINE:
  48                     skip = SkipType.NONE
  49                 continue
  50
  51             line = line.strip()
  52
  53             if skip == SkipType.EXPECT_NEWLINE:
  54                 # The only thing allowed after a list, is this next marker, or a newline.
  55                 if line == "###next-name-looks-similar":
  56                     # "###next-name-looks-similar"
  57                     # Indicates the common prefix of the last list has a very
  58                     # similar name to the next entry, but isn't part of the
  59                     # list. So do not emit a warning about them looking very
  60                     # similar.
  61
  62                     if length != 0:
  63                         errors.append(f"ERROR: list around {name} is shorted than indicated by ###length")
  64
  65                     common_prefix = ""
  66                 else:
  67                     errors.append(f"ERROR: expected a newline after a list, but didn't find any around {name}. Did you add an entry to the list without increasing the length?")
  68
  69                 skip = SkipType.NONE
  70
  71             if line[0] == "#":
  72                 if line.startswith("###length "):
  73                     # "###length <count>"
  74                     # Indicates the next few entries are part of a list. Only
  75                     # the first entry is possibly referenced, and the rest are
  76                     # indirectly.
  77
  78                     if length != 0:
  79                         errors.append(f"ERROR: list around {name} is shorted than indicated by ###length")
  80
  81                     length = line.split(" ")[1].strip()
  82
  83                     if length.isnumeric():
  84                         length = int(length)
  85                     else:
  86                         length = LENGTH_NAME_LOOKUP[length]
  87
  88                     skip = SkipType.LENGTH
  89                 elif line.startswith("###external "):
  90                     # "###external <count>"
  91                     # Indicates the next few entries are used outside the
  92                     # source and will not be referenced.
  93
  94                     if length != 0:
  95                         errors.append(f"ERROR: list around {name} is shorted than indicated by ###length")
  96
  97                     length = line.split(" ")[1].strip()
  98                     length = int(length)
  99
 100                     skip = SkipType.EXTERNAL
 101                 elif line.startswith("###setting-zero-is-special"):
 102                     # "###setting-zero-is-special"
 103                     # Indicates the next entry is part of the "zero is special"
 104                     # flag of settings. These entries are not referenced
 105                     # directly in the code.
 106
 107                     if length != 0:
 108                         errors.append(f"ERROR: list around {name} is shorted than indicated by ###length")
 109
 110                     skip = SkipType.ZERO_IS_SPECIAL
 111
 112                 continue
 113
 114             name = line.split(":")[0].strip()
 115             strings_defined.append(name)
 116
 117             # If a string ends on _TINY or _SMALL, it can be the {TINY} variant.
 118             # Check for this by some fuzzy matching.
 119             if name.endswith(("_SMALL", "_TINY")):
 120                 last_tiny_string = name
 121             elif last_tiny_string:
 122                 matching_name = "_".join(last_tiny_string.split("_")[:-1])
 123                 if name == matching_name:
 124                     strings_found.add(last_tiny_string)
 125             else:
 126                 last_tiny_string = ""
 127
 128             if skip == SkipType.EXTERNAL:
 129                 strings_found.add(name)
 130                 skip = SkipType.LENGTH
 131
 132             if skip == SkipType.LENGTH:
 133                 skip = SkipType.NONE
 134                 length -= 1
 135                 common_prefix = name
 136             elif skip == SkipType.ZERO_IS_SPECIAL:
 137                 strings_found.add(name)
 138             elif length > 0:
 139                 strings_found.add(name)
 140                 length -= 1
 141
 142                 # Find the common prefix of these strings
 143                 for i in range(len(common_prefix)):
 144                     if common_prefix[0 : i + 1] != name[0 : i + 1]:
 145                         common_prefix = common_prefix[0:i]
 146                         break
 147
 148                 if length == 0:
 149                     skip = SkipType.EXPECT_NEWLINE
 150
 151                     if len(common_prefix) < 6:
 152                         errors.append(f"ERROR: common prefix of block including {name} was reduced to {common_prefix}. This means the names in the list are not consistent.")
 153             elif common_prefix:
 154                 if name.startswith(common_prefix):
 155                     errors.append(f"ERROR: {name} looks a lot like block above with prefix {common_prefix}. This mostly means that the list length was too short. Use '###next-name-looks-similar' if it is not.")
 156                 common_prefix = ""
 157
 158     return strings_defined
 159
 160
 161 def scan_source_files(path, strings_found):
 162     for new_path in glob.glob(f"{path}/*"):
 163         if os.path.isdir(new_path):
 164             scan_source_files(new_path, strings_found)
 165             continue
 166
 167         if not new_path.endswith((".c", ".h", ".cpp", ".hpp", ".ini")):
 168             continue
 169
 170         # Most files we can just open, but some use magic, that requires the
 171         # G++ preprocessor before we can make sense out of it.
 172         if new_path == "src/table/cargo_const.h":
 173             p = subprocess.run(["g++", "-E", new_path], stdout=subprocess.PIPE)
 174             output = p.stdout.decode()
 175         else:
 176             with open(new_path) as fp:
 177                 output = fp.read()
 178
 179         # Find all the string references.
 180         matches = re.findall(r"[^A-Z_](STR_[A-Z0-9_]*)", output)
 181         strings_found.update(matches)
 182
 183
 184 def main():
 185     strings_found = set()
 186     errors = []
 187
 188     scan_source_files("src", strings_found)
 189     strings_defined = read_language_file("src/lang/english.txt", strings_found, errors)
 190
 191     # STR_LAST_STRINGID is special, and not really a string.
 192     strings_found.remove("STR_LAST_STRINGID")
 193     # These are mentioned in comments, not really a string.
 194     strings_found.remove("STR_XXX")
 195     strings_found.remove("STR_NEWS")
 196     strings_found.remove("STR_CONTENT_TYPE_")
 197
 198     # This string is added for completion, but never used.
 199     strings_defined.remove("STR_JUST_DATE_SHORT")
 200
 201     strings_defined = sorted(strings_defined)
 202     strings_found = sorted(list(strings_found))
 203
 204     for string in strings_found:
 205         if string not in strings_defined:
 206             errors.append(f"ERROR: {string} found but never defined.")
 207
 208     for string in strings_defined:
 209         if string not in strings_found:
 210             errors.append(f"ERROR: {string} is (possibly) no longer needed.")
 211
 212     if errors:
 213         print("\n".join(errors))
 214         sys.exit(1)
 215
 216     print("OK")
 217
 218
 219 if __name__ == "__main__":
 220     main()