3 """GUI interface to webchecker.
5 This works as a Grail applet too! E.g.
7 <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET>
9 Checkpoints are not (yet??? ever???) supported.
13 Enter a root to check in the text entry box. To enter more than one root,
14 enter them one at a time and press <Return> for each one.
16 Command buttons Start, Stop and "Check one" govern the checking process in
17 the obvious way. Start and "Check one" also enter the root from the text
18 entry box if one is present. There's also a check box (enabled by default)
19 to decide whether actually to follow external links (since this can slow
20 the checking down considerably). Finally there's a Quit button.
22 A series of checkbuttons determines whether the corresponding output panel
23 is shown. List panels are also automatically shown or hidden when their
24 status changes between empty to non-empty. There are six panels:
26 Log -- raw output from the checker (-v, -q affect this)
27 To check -- links discovered but not yet checked
28 Checked -- links that have been checked
29 Bad links -- links that failed upon checking
30 Errors -- pages containing at least one bad link
31 Details -- details about one URL; double click on a URL in any of
32 the above list panels (not in Log) will show details
35 Use your window manager's Close command to quit.
39 -m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d)
40 -q -- quiet operation (also suppresses external links report)
41 -v -- verbose operation; repeating -v will increase verbosity
42 -t root -- specify root dir which should be treated as internal (can repeat)
43 -a -- don't check name anchors
45 Command line arguments:
47 rooturl -- URL to start checking
50 XXX The command line options (-m, -q, -v) should be GUI accessible.
52 XXX The roots should be visible as a list (?).
54 XXX The multipanel user interface is clumsy.
68 # Override some for a weaker platform
69 if sys
.platform
== 'mac':
70 webchecker
.DEFROOT
= "http://grail.cnri.reston.va.us/"
71 webchecker
.MAXPAGE
= 50000
72 webchecker
.verbose
= 4
76 opts
, args
= getopt
.getopt(sys
.argv
[1:], 't:m:qva')
77 except getopt
.error
, msg
:
78 sys
.stdout
= sys
.stderr
80 print __doc__
%vars(webchecker
)
82 webchecker
.verbose
= webchecker
.VERBOSE
83 webchecker
.nonames
= webchecker
.NONAMES
84 webchecker
.maxpage
= webchecker
.MAXPAGE
88 webchecker
.maxpage
= int(a
)
90 webchecker
.verbose
= 0
92 webchecker
.verbose
= webchecker
.verbose
+ 1
96 webchecker
.nonames
= not webchecker
.nonames
97 root
= Tk(className
='Webchecker')
98 root
.protocol("WM_DELETE_WINDOW", root
.quit
)
99 c
= CheckerWindow(root
)
100 c
.setflags(verbose
=webchecker
.verbose
, maxpage
=webchecker
.maxpage
,
101 nonames
=webchecker
.nonames
)
103 for arg
in args
[:-1]:
105 c
.suggestroot(args
[-1])
106 # Usually conditioned on whether external links
107 # will be checked, but since that's not a command
108 # line option, just toss them in.
109 for url_root
in extra_roots
:
110 # Make sure it's terminated by a slash,
111 # so that addroot doesn't discard the last
112 # directory component.
113 if url_root
[-1] != "/":
114 url_root
= url_root
+ "/"
115 c
.addroot(url_root
, add_to_do
= 0)
119 class CheckerWindow(webchecker
.Checker
):
121 def __init__(self
, parent
, root
=webchecker
.DEFROOT
):
122 self
.__parent
= parent
124 self
.__topcontrols
= Frame(parent
)
125 self
.__topcontrols
.pack(side
=TOP
, fill
=X
)
126 self
.__label
= Label(self
.__topcontrols
, text
="Root URL:")
127 self
.__label
.pack(side
=LEFT
)
128 self
.__rootentry
= Entry(self
.__topcontrols
, width
=60)
129 self
.__rootentry
.pack(side
=LEFT
)
130 self
.__rootentry
.bind('<Return>', self
.enterroot
)
131 self
.__rootentry
.focus_set()
133 self
.__controls
= Frame(parent
)
134 self
.__controls
.pack(side
=TOP
, fill
=X
)
136 self
.__start
= Button(self
.__controls
, text
="Run", command
=self
.start
)
137 self
.__start
.pack(side
=LEFT
)
138 self
.__stop
= Button(self
.__controls
, text
="Stop", command
=self
.stop
,
140 self
.__stop
.pack(side
=LEFT
)
141 self
.__step
= Button(self
.__controls
, text
="Check one",
143 self
.__step
.pack(side
=LEFT
)
144 self
.__cv
= BooleanVar(parent
)
145 self
.__cv
.set(self
.checkext
)
146 self
.__checkext
= Checkbutton(self
.__controls
, variable
=self
.__cv
,
147 command
=self
.update_checkext
,
148 text
="Check nonlocal links",)
149 self
.__checkext
.pack(side
=LEFT
)
150 self
.__reset
= Button(self
.__controls
, text
="Start over", command
=self
.reset
)
151 self
.__reset
.pack(side
=LEFT
)
152 if __name__
== '__main__': # No Quit button under Grail!
153 self
.__quit
= Button(self
.__controls
, text
="Quit",
154 command
=self
.__parent
.quit
)
155 self
.__quit
.pack(side
=RIGHT
)
157 self
.__status
= Label(parent
, text
="Status: initial", anchor
=W
)
158 self
.__status
.pack(side
=TOP
, fill
=X
)
159 self
.__checking
= Label(parent
, text
="Idle", anchor
=W
)
160 self
.__checking
.pack(side
=TOP
, fill
=X
)
161 self
.__mp
= mp
= MultiPanel(parent
)
162 sys
.stdout
= self
.__log
= LogPanel(mp
, "Log")
163 self
.__todo
= ListPanel(mp
, "To check", self
, self
.showinfo
)
164 self
.__done
= ListPanel(mp
, "Checked", self
, self
.showinfo
)
165 self
.__bad
= ListPanel(mp
, "Bad links", self
, self
.showinfo
)
166 self
.__errors
= ListPanel(mp
, "Pages w/ bad links", self
, self
.showinfo
)
167 self
.__details
= LogPanel(mp
, "Details")
168 self
.root_seed
= None
169 webchecker
.Checker
.__init
__(self
)
171 root
= str(root
).strip()
173 self
.suggestroot(root
)
177 webchecker
.Checker
.reset(self
)
178 for p
in self
.__todo
, self
.__done
, self
.__bad
, self
.__errors
:
181 self
.suggestroot(self
.root_seed
)
183 def suggestroot(self
, root
):
184 self
.__rootentry
.delete(0, END
)
185 self
.__rootentry
.insert(END
, root
)
186 self
.__rootentry
.select_range(0, END
)
187 self
.root_seed
= root
189 def enterroot(self
, event
=None):
190 root
= self
.__rootentry
.get()
193 self
.__checking
.config(text
="Adding root "+root
)
194 self
.__checking
.update_idletasks()
196 self
.__checking
.config(text
="Idle")
198 i
= self
.__todo
.items
.index(root
)
199 except (ValueError, IndexError):
202 self
.__todo
.list.select_clear(0, END
)
203 self
.__todo
.list.select_set(i
)
204 self
.__todo
.list.yview(i
)
205 self
.__rootentry
.delete(0, END
)
208 self
.__start
.config(state
=DISABLED
, relief
=SUNKEN
)
209 self
.__stop
.config(state
=NORMAL
)
210 self
.__step
.config(state
=DISABLED
)
216 self
.__stop
.config(state
=DISABLED
, relief
=SUNKEN
)
220 self
.__start
.config(state
=DISABLED
)
221 self
.__step
.config(state
=DISABLED
, relief
=SUNKEN
)
228 self
.__parent
.after_idle(self
.dosomething
)
230 self
.__checking
.config(text
="Idle")
231 self
.__start
.config(state
=NORMAL
, relief
=RAISED
)
232 self
.__stop
.config(state
=DISABLED
, relief
=RAISED
)
233 self
.__step
.config(state
=NORMAL
, relief
=RAISED
)
237 def dosomething(self
):
238 if self
.__busy
: return
241 l
= self
.__todo
.selectedindices()
246 self
.__todo
.list.select_set(i
)
247 self
.__todo
.list.yview(i
)
248 url
= self
.__todo
.items
[i
]
249 self
.__checking
.config(text
="Checking "+self
.format_url(url
))
250 self
.__parent
.update()
257 def showinfo(self
, url
):
260 d
.put("URL: %s\n" % self
.format_url(url
))
261 if self
.bad
.has_key(url
):
262 d
.put("Error: %s\n" % str(self
.bad
[url
]))
263 if url
in self
.roots
:
264 d
.put("Note: This is a root URL\n")
265 if self
.done
.has_key(url
):
266 d
.put("Status: checked\n")
268 elif self
.todo
.has_key(url
):
269 d
.put("Status: to check\n")
272 d
.put("Status: unknown (!)\n")
274 if (not url
[1]) and self
.errors
.has_key(url
[0]):
275 d
.put("Bad links from this page:\n")
276 for triple
in self
.errors
[url
[0]]:
277 link
, rawlink
, msg
= triple
278 d
.put(" HREF %s" % self
.format_url(link
))
279 if self
.format_url(link
) != rawlink
: d
.put(" (%s)" %rawlink
)
281 d
.put(" error %s\n" % str(msg
))
282 self
.__mp
.showpanel("Details")
283 for source
, rawlink
in o
:
284 d
.put("Origin: %s" % source
)
285 if rawlink
!= self
.format_url(url
):
286 d
.put(" (%s)" % rawlink
)
290 def setbad(self
, url
, msg
):
291 webchecker
.Checker
.setbad(self
, url
, msg
)
292 self
.__bad
.insert(url
)
295 def setgood(self
, url
):
296 webchecker
.Checker
.setgood(self
, url
)
297 self
.__bad
.remove(url
)
300 def newlink(self
, url
, origin
):
301 webchecker
.Checker
.newlink(self
, url
, origin
)
302 if self
.done
.has_key(url
):
303 self
.__done
.insert(url
)
304 elif self
.todo
.has_key(url
):
305 self
.__todo
.insert(url
)
308 def markdone(self
, url
):
309 webchecker
.Checker
.markdone(self
, url
)
310 self
.__done
.insert(url
)
311 self
.__todo
.remove(url
)
314 def seterror(self
, url
, triple
):
315 webchecker
.Checker
.seterror(self
, url
, triple
)
316 self
.__errors
.insert((url
, ''))
320 self
.__status
.config(text
="Status: "+self
.status())
321 self
.__parent
.update()
323 def update_checkext(self
):
324 self
.checkext
= self
.__cv
.get()
329 def __init__(self
, mp
, name
, checker
, showinfo
=None):
332 self
.showinfo
= showinfo
333 self
.checker
= checker
334 self
.panel
= mp
.addpanel(name
)
335 self
.list, self
.frame
= tktools
.make_list_box(
336 self
.panel
, width
=60, height
=5)
337 self
.list.config(exportselection
=0)
339 self
.list.bind('<Double-Button-1>', self
.doubleclick
)
344 self
.list.delete(0, END
)
345 self
.mp
.hidepanel(self
.name
)
347 def doubleclick(self
, event
):
348 l
= self
.selectedindices()
350 self
.showinfo(self
.items
[l
[0]])
352 def selectedindices(self
):
353 l
= self
.list.curselection()
357 def insert(self
, url
):
358 if url
not in self
.items
:
360 self
.mp
.showpanel(self
.name
)
361 # (I tried sorting alphabetically, but the display is too jumpy)
363 self
.list.insert(i
, self
.checker
.format_url(url
))
365 self
.items
.insert(i
, url
)
367 def remove(self
, url
):
369 i
= self
.items
.index(url
)
370 except (ValueError, IndexError):
373 was_selected
= i
in self
.selectedindices()
377 self
.mp
.hidepanel(self
.name
)
379 if i
>= len(self
.items
):
380 i
= len(self
.items
) - 1
381 self
.list.select_set(i
)
386 def __init__(self
, mp
, name
):
389 self
.panel
= mp
.addpanel(name
)
390 self
.text
, self
.frame
= tktools
.make_text_box(self
.panel
, height
=10)
391 self
.text
.config(wrap
=NONE
)
394 self
.text
.delete("1.0", END
)
395 self
.text
.yview("1.0")
398 self
.text
.insert(END
, s
)
403 self
.text
.insert(END
, s
)
411 def __init__(self
, parent
):
413 self
.frame
= Frame(self
.parent
)
414 self
.frame
.pack(expand
=1, fill
=BOTH
)
415 self
.topframe
= Frame(self
.frame
, borderwidth
=2, relief
=RAISED
)
416 self
.topframe
.pack(fill
=X
)
417 self
.botframe
= Frame(self
.frame
)
418 self
.botframe
.pack(expand
=1, fill
=BOTH
)
422 def addpanel(self
, name
, on
=0):
423 v
= StringVar(self
.parent
)
428 check
= Checkbutton(self
.topframe
, text
=name
,
429 offvalue
="", onvalue
=name
, variable
=v
,
430 command
=self
.checkpanel
)
431 check
.pack(side
=LEFT
)
432 panel
= Frame(self
.botframe
)
433 label
= Label(panel
, text
=name
, borderwidth
=2, relief
=RAISED
, anchor
=W
)
434 label
.pack(side
=TOP
, fill
=X
)
436 self
.panelnames
.append(name
)
437 self
.panels
[name
] = t
439 panel
.pack(expand
=1, fill
=BOTH
)
442 def showpanel(self
, name
):
443 v
, check
, panel
= self
.panels
[name
]
445 panel
.pack(expand
=1, fill
=BOTH
)
447 def hidepanel(self
, name
):
448 v
, check
, panel
= self
.panels
[name
]
452 def checkpanel(self
):
453 for name
in self
.panelnames
:
454 v
, check
, panel
= self
.panels
[name
]
456 for name
in self
.panelnames
:
457 v
, check
, panel
= self
.panels
[name
]
459 panel
.pack(expand
=1, fill
=BOTH
)
462 if __name__
== '__main__':