1 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Component for automatically creating masks of changing areas of a website.
7 Works by repeated invokation of a browser and scraping of the resulting page.
8 Areas that differ will be added to the auto-generated mask. The mask generator
9 considers the mask complete when further scrapes fail to produce any differences
13 import os
# Functions for walking the directory tree
14 import tempfile
# Get a temporary directory to hold intermediates
15 import time
# Used for sleep() and naming masks by time
20 from PIL
import ImageChops
24 def CreateCommand(cmdline
):
25 """Inserts the command and arguments into a command line for parsing."""
26 cmd
= cmdline
.AddCommand(
28 "Automatically generates a mask from a list of URLs",
33 ["-bp", "--browserpath"], "Full path to browser's executable",
34 type="readfile", metaname
="PATH")
36 ["-b", "--browser"], "Which browser to use", type="string",
39 ["-bv", "--browserver"], "Version of the browser", metaname
="VERSION")
41 ["-o", "--outdir"], "Directory to store generated masks", metaname
="DIR",
44 ["-u", "--url"], "URL to compare")
46 ["-l", "--list"], "List of URLs to compare", type="readfile")
47 cmd
.AddMutualExclusion(["--url", "--list"])
49 ["-s", "--startline"], "First line of URL list", type="int")
51 ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
53 ["-c", "--count"], "Number of lines of URL file to use", type="int")
54 cmd
.AddDependency("--startline", "--list")
55 cmd
.AddRequiredGroup(["--url", "--list"])
56 cmd
.AddDependency("--endline", "--list")
57 cmd
.AddDependency("--count", "--list")
58 cmd
.AddMutualExclusion(["--count", "--endline"])
59 cmd
.AddDependency("--count", "--startline")
61 ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
63 type="int", default
=60)
66 "Amount of time (in seconds) to wait between successive scrapes",
67 type="int", default
=60)
70 "Number of successive scrapes which must result in no change to a mask "
71 "before mask creation is considered complete", type="int", default
=10)
73 ["-sz", "--size"], "Browser window size", default
=(800, 600), type="coords")
74 cmd
.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes")
77 "Number of times to scrape before giving up", type="int", default
=50)
79 ["-th", "--threshhold"],
80 "Percentage of different pixels (0-100) above which the scrape will be"
81 "discarded and the mask not updated.", type="int", default
=100)
84 "Number of times a scrape can fail before giving up on the URL.",
85 type="int", default
=1)
88 def ValidateMaskmaker(command
):
89 """Validate the arguments to maskmaker. Raises ParseError if failed."""
90 executables
= [".exe", ".com", ".bat"]
91 if command
["--browserpath"]:
92 if os
.path
.splitext(command
["--browserpath"])[1].lower() not in executables
:
93 raise command_line
.ParseError("Browser filename must be an executable")
96 def ExecuteMaskmaker(command
):
97 """Performs automatic mask generation."""
99 # Get the list of URLs to generate masks for
100 class MaskmakerURL(object):
101 """Helper class for holding information about a URL passed to maskmaker."""
102 __slots__
= ['url', 'consecutive_successes', 'errors']
103 def __init__(self
, url
):
105 self
.consecutive_successes
= 0
109 url_list
= [MaskmakerURL(command
["--url"])]
111 startline
= command
["--startline"]
112 if command
["--count"]:
113 endline
= startline
+command
["--count"]
115 endline
= command
["--endline"]
116 url_list
= [MaskmakerURL(url
.strip()) for url
in
117 open(command
["--list"], "r").readlines()[startline
:endline
]]
122 outdir
= command
["--outdir"]
123 scrapes
= command
["--scrapes"]
124 errors
= command
["--errors"]
125 size
= command
["--size"]
128 scrapedir
= command
["--scrapedir"]
129 if not scrapedir
: scrapedir
= tempfile
.gettempdir()
132 scraper
= scrapers
.GetScraper((command
["--browser"], command
["--browserver"]))
134 # Repeatedly iterate through the list of URLs until either every URL has
135 # a successful mask or too many errors, or we've exceeded the giveup limit
136 while url_list
and scrape_pass
< command
["--giveup"]:
139 print "Processing %r..." % url
.url
140 mask_filename
= drivers
.windowing
.URLtoFilename(url
.url
, outdir
, ".bmp")
142 # Load the existing mask. This is in a loop so we can try to recover
143 # from error conditions
146 mask
= Image
.open(mask_filename
)
147 if mask
.size
!= size
:
148 print " %r already exists and is the wrong size! (%r vs %r)" % (
149 mask_filename
, mask
.size
, size
)
150 mask_filename
= "%s_%r%s" % (
151 mask_filename
[:-4], size
, mask_filename
[-4:])
152 print " Trying again as %r..." % mask_filename
156 print " %r does not exist, creating" % mask_filename
157 mask
= Image
.new("1", size
, 1)
158 mask
.save(mask_filename
)
160 # Find the stored scrape path
161 mask_scrape_dir
= os
.path
.join(
162 scrapedir
, os
.path
.splitext(os
.path
.basename(mask_filename
))[0])
163 drivers
.windowing
.PreparePath(mask_scrape_dir
)
165 # Find the baseline image
166 mask_scrapes
= os
.listdir(mask_scrape_dir
)
170 print " No baseline image found, mask will not be updated"
173 baseline
= Image
.open(os
.path
.join(mask_scrape_dir
, mask_scrapes
[0]))
175 mask_scrape_filename
= os
.path
.join(mask_scrape_dir
,
176 time
.strftime("%y%m%d-%H%M%S.bmp"))
179 result
= scraper
.Scrape(
180 [url
.url
], mask_scrape_dir
, size
, (0, 0),
181 command
["--timeout"], path
=command
["--browserpath"],
182 filename
=mask_scrape_filename
)
185 # Return value other than None means an error
186 print " Scrape failed with error '%r'" % result
188 if url
.errors
>= errors
:
189 print " ** Exceeded maximum error count for this URL, giving up"
192 # Load the new scrape
193 scrape
= Image
.open(mask_scrape_filename
)
195 # Calculate the difference between the new scrape and the baseline,
196 # subject to the current mask
198 diff
= ImageChops
.multiply(ImageChops
.difference(scrape
, baseline
),
199 mask
.convert(scrape
.mode
))
201 # If the difference is none, there's nothing to update
202 if max(diff
.getextrema()) == (0, 0):
203 print " Scrape identical to baseline, no change in mask"
204 url
.consecutive_successes
+= 1
205 if url
.consecutive_successes
>= scrapes
:
206 print " ** No change for %r scrapes, done!" % scrapes
208 # convert the difference to black and white, then change all
209 # black pixels (where the scrape and the baseline were identical)
210 # to white, all others (where the scrape and the baseline differed)
213 # Since the below command is a little unclear, here's how it works.
214 # 1. convert("L") converts the RGB image to grayscale
215 # 2. point() maps grayscale values (or the individual channels)
216 # of an RGB image) to different ones. Because it operates on
217 # individual channels, the grayscale conversion from step 1
219 # 3. The "1" second parameter to point() outputs the result as
220 # a monochrome bitmap. If the original RGB image were converted
221 # directly to monochrome, PIL would dither it.
222 diff
= diff
.convert("L").point([255]+[0]*255, "1")
224 # count the number of different pixels
225 diff_pixels
= diff
.getcolors()[0][0]
228 diff_pixel_percent
= diff_pixels
* 100.0 / (mask
.size
[0]*mask
.size
[1])
229 if diff_pixel_percent
> command
["--threshhold"]:
230 print (" Scrape differed from baseline by %.2f percent, ignoring"
231 % diff_pixel_percent
)
233 print " Scrape differed in %d pixels, updating mask" % diff_pixels
234 mask
= ImageChops
.multiply(mask
, diff
)
235 mask
.save(mask_filename
)
237 # reset the number of consecutive "good" scrapes
238 url
.consecutive_successes
= 0
240 # Remove URLs whose mask is deemed done
241 complete_list
.extend(
242 [url
for url
in url_list
if url
.consecutive_successes
>= scrapes
])
244 [url
for url
in url_list
if url
.errors
>= errors
])
246 url
for url
in url_list
if
247 url
.consecutive_successes
< scrapes
and
251 print "**Done with scrape pass %d\n" % scrape_pass
253 if scrape_pass
>= command
["--giveup"]:
254 print "**Exceeded giveup threshhold. Giving up."
256 print "Waiting %d seconds..." % command
["--wait"]
257 time
.sleep(command
["--wait"])
260 print "*** MASKMAKER COMPLETE ***"
261 print "Summary report:"
262 print " %d masks successfully generated" % len(complete_list
)
263 for url
in complete_list
:
265 print " %d masks failed with too many errors" % len(error_list
)
266 for url
in error_list
:
268 if scrape_pass
>= command
["--giveup"]:
269 print (" %d masks were not completed before "
270 "reaching the giveup threshhold" % len(url_list
))