Allow offsets in blobs larger than 2GB on 32 bit Chromium builds.
[chromium-blink-merge.git] / tools / resources / find_unused_resources.py
blobd6e52c866ed2baad931aea02dc3e7bede04a978b
1 #!/usr/bin/env python
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """This script searches for unused art assets listed in a .grd file.
8 It uses git grep to look for references to the IDR resource id or the base
9 filename. If neither is found, the file is reported unused.
11 Requires a git checkout. Must be run from your checkout's "src" root.
13 Example:
14 cd /work/chrome/src
15 tools/resources/find_unused_resouces.py ash/resources/ash_resources.grd
16 """
18 __author__ = 'jamescook@chromium.org (James Cook)'
21 import os
22 import re
23 import subprocess
24 import sys
27 def GetBaseResourceId(resource_id):
28 """Removes common suffixes from a resource ID.
30 Removes suffixies that may be added by macros like IMAGE_GRID or IMAGE_BORDER.
31 For example, converts IDR_FOO_LEFT and IDR_FOO_RIGHT to just IDR_FOO.
33 Args:
34 resource_id: String resource ID.
36 Returns:
37 A string with the base part of the resource ID.
38 """
39 suffixes = [
40 '_TOP_LEFT', '_TOP', '_TOP_RIGHT',
41 '_LEFT', '_CENTER', '_RIGHT',
42 '_BOTTOM_LEFT', '_BOTTOM', '_BOTTOM_RIGHT',
43 '_TL', '_T', '_TR',
44 '_L', '_M', '_R',
45 '_BL', '_B', '_BR']
46 # Note: This does not check _HOVER, _PRESSED, _HOT, etc. as those are never
47 # used in macros.
48 for suffix in suffixes:
49 if resource_id.endswith(suffix):
50 resource_id = resource_id[:-len(suffix)]
51 return resource_id
54 def FindFilesWithContents(string_a, string_b):
55 """Returns list of paths of files that contain |string_a| or |string_b|.
57 Uses --name-only to print the file paths. The default behavior of git grep
58 is to OR together multiple patterns.
60 Args:
61 string_a: A string to search for (not a regular expression).
62 string_b: As above.
64 Returns:
65 A list of file paths as strings.
66 """
67 matching_files = subprocess.check_output([
68 'git', 'grep', '--name-only', '--fixed-strings', '-e', string_a,
69 '-e', string_b])
70 files_list = matching_files.split('\n')
71 # The output ends in a newline, so slice that off.
72 files_list = files_list[:-1]
73 return files_list
76 def GetUnusedResources(grd_filepath):
77 """Returns a list of resources that are unused in the code.
79 Prints status lines to the console because this function is quite slow.
81 Args:
82 grd_filepath: Path to a .grd file listing resources.
84 Returns:
85 A list of pairs of [resource_id, filepath] for the unused resources.
86 """
87 unused_resources = []
88 grd_file = open(grd_filepath, 'r')
89 grd_data = grd_file.read()
90 print 'Checking:'
91 # Match the resource id and file path out of substrings like:
92 # ...name="IDR_FOO_123" file="common/foo.png"...
93 # by matching between the quotation marks.
94 pattern = re.compile(
95 r"""name="([^"]*)" # Match resource ID between quotes.
96 \s* # Run of whitespace, including newlines.
97 file="([^"]*)" # Match file path between quotes.""",
98 re.VERBOSE)
99 # Use finditer over the file contents because there may be newlines between
100 # the name and file attributes.
101 searched = set()
102 for result in pattern.finditer(grd_data):
103 # Extract the IDR resource id and file path.
104 resource_id = result.group(1)
105 filepath = result.group(2)
106 filename = os.path.basename(filepath)
107 base_resource_id = GetBaseResourceId(resource_id)
109 # Do not bother repeating searches.
110 key = (base_resource_id, filename)
111 if key in searched:
112 continue
113 searched.add(key)
115 # Print progress as we go along.
116 print resource_id
118 # Ensure the resource isn't used anywhere by checking both for the resource
119 # id (which should appear in C++ code) and the raw filename (in case the
120 # file is referenced in a script, test HTML file, etc.).
121 matching_files = FindFilesWithContents(base_resource_id, filename)
123 # Each file is matched once in the resource file itself. If there are no
124 # other matching files, it is unused.
125 if len(matching_files) == 1:
126 # Give the user some happy news.
127 print 'Unused!'
128 unused_resources.append([resource_id, filepath])
130 return unused_resources
133 def GetScaleDirectories(resources_path):
134 """Returns a list of paths to per-scale-factor resource directories.
136 Assumes the directory names end in '_percent', for example,
137 ash/resources/default_200_percent or
138 chrome/app/theme/resources/touch_140_percent
140 Args:
141 resources_path: The base path of interest.
143 Returns:
144 A list of paths relative to the 'src' directory.
146 file_list = os.listdir(resources_path)
147 scale_directories = []
148 for file_entry in file_list:
149 file_path = os.path.join(resources_path, file_entry)
150 if os.path.isdir(file_path) and file_path.endswith('_percent'):
151 scale_directories.append(file_path)
153 scale_directories.sort()
154 return scale_directories
157 def main():
158 # The script requires exactly one parameter, the .grd file path.
159 if len(sys.argv) != 2:
160 print 'Usage: tools/resources/find_unused_resources.py <path/to/grd>'
161 sys.exit(1)
162 grd_filepath = sys.argv[1]
164 # Try to ensure we are in a source checkout.
165 current_dir = os.getcwd()
166 if os.path.basename(current_dir) != 'src':
167 print 'Script must be run in your "src" directory.'
168 sys.exit(1)
170 # We require a git checkout to use git grep.
171 if not os.path.exists(current_dir + '/.git'):
172 print 'You must use a git checkout for this script to run.'
173 print current_dir + '/.git', 'not found.'
174 sys.exit(1)
176 # Look up the scale-factor directories.
177 resources_path = os.path.dirname(grd_filepath)
178 scale_directories = GetScaleDirectories(resources_path)
179 if not scale_directories:
180 print 'No scale directories (like "default_100_percent") found.'
181 sys.exit(1)
183 # |unused_resources| stores pairs of [resource_id, filepath] for resource ids
184 # that are not referenced in the code.
185 unused_resources = GetUnusedResources(grd_filepath)
186 if not unused_resources:
187 print 'All resources are used.'
188 sys.exit(0)
190 # Dump our output for the user.
191 print
192 print 'Unused resource ids:'
193 for resource_id, filepath in unused_resources:
194 print resource_id
195 # Print a list of 'git rm' command lines to remove unused assets.
196 print
197 print 'Unused files:'
198 for resource_id, filepath in unused_resources:
199 for directory in scale_directories:
200 print 'git rm ' + os.path.join(directory, filepath)
203 if __name__ == '__main__':
204 main()