Fix GN midl template for input/output checking
[chromium-blink-merge.git] / tools / site_compare / commands / maskmaker.py
blob8aeefcbf0051ef54ea08e21473084e34a0dd91a6
1 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Component for automatically creating masks of changing areas of a website.
7 Works by repeated invokation of a browser and scraping of the resulting page.
8 Areas that differ will be added to the auto-generated mask. The mask generator
9 considers the mask complete when further scrapes fail to produce any differences
10 in the mask.
11 """
13 import os # Functions for walking the directory tree
14 import tempfile # Get a temporary directory to hold intermediates
15 import time # Used for sleep() and naming masks by time
17 import command_line
18 import drivers
19 from PIL import Image
20 from PIL import ImageChops
21 import scrapers
24 def CreateCommand(cmdline):
25 """Inserts the command and arguments into a command line for parsing."""
26 cmd = cmdline.AddCommand(
27 ["maskmaker"],
28 "Automatically generates a mask from a list of URLs",
29 ValidateMaskmaker,
30 ExecuteMaskmaker)
32 cmd.AddArgument(
33 ["-bp", "--browserpath"], "Full path to browser's executable",
34 type="readfile", metaname="PATH")
35 cmd.AddArgument(
36 ["-b", "--browser"], "Which browser to use", type="string",
37 default="chrome")
38 cmd.AddArgument(
39 ["-bv", "--browserver"], "Version of the browser", metaname="VERSION")
40 cmd.AddArgument(
41 ["-o", "--outdir"], "Directory to store generated masks", metaname="DIR",
42 required=True)
43 cmd.AddArgument(
44 ["-u", "--url"], "URL to compare")
45 cmd.AddArgument(
46 ["-l", "--list"], "List of URLs to compare", type="readfile")
47 cmd.AddMutualExclusion(["--url", "--list"])
48 cmd.AddArgument(
49 ["-s", "--startline"], "First line of URL list", type="int")
50 cmd.AddArgument(
51 ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
52 cmd.AddArgument(
53 ["-c", "--count"], "Number of lines of URL file to use", type="int")
54 cmd.AddDependency("--startline", "--list")
55 cmd.AddRequiredGroup(["--url", "--list"])
56 cmd.AddDependency("--endline", "--list")
57 cmd.AddDependency("--count", "--list")
58 cmd.AddMutualExclusion(["--count", "--endline"])
59 cmd.AddDependency("--count", "--startline")
60 cmd.AddArgument(
61 ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
62 "finish loading",
63 type="int", default=60)
64 cmd.AddArgument(
65 ["-w", "--wait"],
66 "Amount of time (in seconds) to wait between successive scrapes",
67 type="int", default=60)
68 cmd.AddArgument(
69 ["-sc", "--scrapes"],
70 "Number of successive scrapes which must result in no change to a mask "
71 "before mask creation is considered complete", type="int", default=10)
72 cmd.AddArgument(
73 ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
74 cmd.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes")
75 cmd.AddArgument(
76 ["-gu", "--giveup"],
77 "Number of times to scrape before giving up", type="int", default=50)
78 cmd.AddArgument(
79 ["-th", "--threshhold"],
80 "Percentage of different pixels (0-100) above which the scrape will be"
81 "discarded and the mask not updated.", type="int", default=100)
82 cmd.AddArgument(
83 ["--er", "--errors"],
84 "Number of times a scrape can fail before giving up on the URL.",
85 type="int", default=1)
88 def ValidateMaskmaker(command):
89 """Validate the arguments to maskmaker. Raises ParseError if failed."""
90 executables = [".exe", ".com", ".bat"]
91 if command["--browserpath"]:
92 if os.path.splitext(command["--browserpath"])[1].lower() not in executables:
93 raise command_line.ParseError("Browser filename must be an executable")
96 def ExecuteMaskmaker(command):
97 """Performs automatic mask generation."""
99 # Get the list of URLs to generate masks for
100 class MaskmakerURL(object):
101 """Helper class for holding information about a URL passed to maskmaker."""
102 __slots__ = ['url', 'consecutive_successes', 'errors']
103 def __init__(self, url):
104 self.url = url
105 self.consecutive_successes = 0
106 self.errors = 0
108 if command["--url"]:
109 url_list = [MaskmakerURL(command["--url"])]
110 else:
111 startline = command["--startline"]
112 if command["--count"]:
113 endline = startline+command["--count"]
114 else:
115 endline = command["--endline"]
116 url_list = [MaskmakerURL(url.strip()) for url in
117 open(command["--list"], "r").readlines()[startline:endline]]
119 complete_list = []
120 error_list = []
122 outdir = command["--outdir"]
123 scrapes = command["--scrapes"]
124 errors = command["--errors"]
125 size = command["--size"]
126 scrape_pass = 0
128 scrapedir = command["--scrapedir"]
129 if not scrapedir: scrapedir = tempfile.gettempdir()
131 # Get the scraper
132 scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
134 # Repeatedly iterate through the list of URLs until either every URL has
135 # a successful mask or too many errors, or we've exceeded the giveup limit
136 while url_list and scrape_pass < command["--giveup"]:
137 # Scrape each URL
138 for url in url_list:
139 print "Processing %r..." % url.url
140 mask_filename = drivers.windowing.URLtoFilename(url.url, outdir, ".bmp")
142 # Load the existing mask. This is in a loop so we can try to recover
143 # from error conditions
144 while True:
145 try:
146 mask = Image.open(mask_filename)
147 if mask.size != size:
148 print " %r already exists and is the wrong size! (%r vs %r)" % (
149 mask_filename, mask.size, size)
150 mask_filename = "%s_%r%s" % (
151 mask_filename[:-4], size, mask_filename[-4:])
152 print " Trying again as %r..." % mask_filename
153 continue
154 break
155 except IOError:
156 print " %r does not exist, creating" % mask_filename
157 mask = Image.new("1", size, 1)
158 mask.save(mask_filename)
160 # Find the stored scrape path
161 mask_scrape_dir = os.path.join(
162 scrapedir, os.path.splitext(os.path.basename(mask_filename))[0])
163 drivers.windowing.PreparePath(mask_scrape_dir)
165 # Find the baseline image
166 mask_scrapes = os.listdir(mask_scrape_dir)
167 mask_scrapes.sort()
169 if not mask_scrapes:
170 print " No baseline image found, mask will not be updated"
171 baseline = None
172 else:
173 baseline = Image.open(os.path.join(mask_scrape_dir, mask_scrapes[0]))
175 mask_scrape_filename = os.path.join(mask_scrape_dir,
176 time.strftime("%y%m%d-%H%M%S.bmp"))
178 # Do the scrape
179 result = scraper.Scrape(
180 [url.url], mask_scrape_dir, size, (0, 0),
181 command["--timeout"], path=command["--browserpath"],
182 filename=mask_scrape_filename)
184 if result:
185 # Return value other than None means an error
186 print " Scrape failed with error '%r'" % result
187 url.errors += 1
188 if url.errors >= errors:
189 print " ** Exceeded maximum error count for this URL, giving up"
190 continue
192 # Load the new scrape
193 scrape = Image.open(mask_scrape_filename)
195 # Calculate the difference between the new scrape and the baseline,
196 # subject to the current mask
197 if baseline:
198 diff = ImageChops.multiply(ImageChops.difference(scrape, baseline),
199 mask.convert(scrape.mode))
201 # If the difference is none, there's nothing to update
202 if max(diff.getextrema()) == (0, 0):
203 print " Scrape identical to baseline, no change in mask"
204 url.consecutive_successes += 1
205 if url.consecutive_successes >= scrapes:
206 print " ** No change for %r scrapes, done!" % scrapes
207 else:
208 # convert the difference to black and white, then change all
209 # black pixels (where the scrape and the baseline were identical)
210 # to white, all others (where the scrape and the baseline differed)
211 # to black.
213 # Since the below command is a little unclear, here's how it works.
214 # 1. convert("L") converts the RGB image to grayscale
215 # 2. point() maps grayscale values (or the individual channels)
216 # of an RGB image) to different ones. Because it operates on
217 # individual channels, the grayscale conversion from step 1
218 # is necessary.
219 # 3. The "1" second parameter to point() outputs the result as
220 # a monochrome bitmap. If the original RGB image were converted
221 # directly to monochrome, PIL would dither it.
222 diff = diff.convert("L").point([255]+[0]*255, "1")
224 # count the number of different pixels
225 diff_pixels = diff.getcolors()[0][0]
227 # is this too much?
228 diff_pixel_percent = diff_pixels * 100.0 / (mask.size[0]*mask.size[1])
229 if diff_pixel_percent > command["--threshhold"]:
230 print (" Scrape differed from baseline by %.2f percent, ignoring"
231 % diff_pixel_percent)
232 else:
233 print " Scrape differed in %d pixels, updating mask" % diff_pixels
234 mask = ImageChops.multiply(mask, diff)
235 mask.save(mask_filename)
237 # reset the number of consecutive "good" scrapes
238 url.consecutive_successes = 0
240 # Remove URLs whose mask is deemed done
241 complete_list.extend(
242 [url for url in url_list if url.consecutive_successes >= scrapes])
243 error_list.extend(
244 [url for url in url_list if url.errors >= errors])
245 url_list = [
246 url for url in url_list if
247 url.consecutive_successes < scrapes and
248 url.errors < errors]
250 scrape_pass += 1
251 print "**Done with scrape pass %d\n" % scrape_pass
253 if scrape_pass >= command["--giveup"]:
254 print "**Exceeded giveup threshhold. Giving up."
255 else:
256 print "Waiting %d seconds..." % command["--wait"]
257 time.sleep(command["--wait"])
259 print
260 print "*** MASKMAKER COMPLETE ***"
261 print "Summary report:"
262 print " %d masks successfully generated" % len(complete_list)
263 for url in complete_list:
264 print " ", url.url
265 print " %d masks failed with too many errors" % len(error_list)
266 for url in error_list:
267 print " ", url.url
268 if scrape_pass >= command["--giveup"]:
269 print (" %d masks were not completed before "
270 "reaching the giveup threshhold" % len(url_list))
271 for url in url_list:
272 print " ", url.url