moving zebra to zebra-broken
[zcc.git] / ztargets / irspy.rb
blob24da66c60e9d99b9d24b83b9c527344c5d0b8c16
1 ## (C) Jason Ronallo
2 # May 2007
4 require 'rubygems'
5 require 'hpricot'
6 require 'open-uri'
7 require "rexml/document"
8 require "rexml/xpath"
10 m = 20
11 match = 0
12 all_lines = []
13 i = 0
15 puts "And we begin!"
16 while m < 4000
17   puts "m = #{m}"
18   puts "This is going to take a while...."
20   #STDIN.gets
21   doc = open("http://irspy.indexdata.com/find.html?cql.anywhere=&dc.title=&zeerex.country=&net.protocol=z39.50&net.host=&net.port=&net.path=&zeerex.libType=&dc.description=&dc.creator=&_sort=&_search=Search&_count=#{m}&_skip=#{m-20}"){
22     |f| Hpricot(f) }
24     #fields = doc.search(/"//a[@title='Raw XML record']")
25     #fields = doc.search("//a")
26     #puts fields.inspect
28     all_xml = []
29     (doc/"//a[@title='Raw XML record']").each do |link|
30       xml_address = "http://irspy.indexdata.com" + link.attributes['href']
31       #puts xml_address
32       file = Net::HTTP.get(URI.parse(xml_address))
33       all_xml << file
34     end
36     full_xml = "<root>" + all_xml.join("\n") + "</root>"
38     #puts full_xml
40     xml_doc = REXML::Document.new full_xml
41     #puts xml_doc
44     File.open("irspy#{i}.xml", "w") do |f|
45       f.puts xml_doc
46       puts "written to file"
47     end
48     i += 1
49     break
55     records = xml_doc.elements.to_a("//explain")
56     puts
57     puts records.length
58     #STDIN.gets
59     puts
61     records.each do | rec |
62       puts rec
63     all_recs = File.new("irspy_all.txt","w+")
64     all_recs.write(rec)
65     all_recs.close
68     STDIN.gets
69     puts rec.elements["databaseInfo/title"].text
70     if rec.elements["databaseInfo/title"]
71       puts "Looking in #{rec.elements["databaseInfo/title"].text} ..."
72     else
73       puts "Untitled... "
74     end
75     if rec.elements ["recordInfo/recordSyntax[@name='xml']"]
76       #.attributes["recodSyntax"]
77       #puts rec.elements["recordInfo/recordSyntax[@name='xml']"]
78       #puts "--"
79       if rec.elements ["serverInfo[@protocol='Z39.50']"]
80         puts "we've got a match!"
81         match =+ 1
83         a = []
84         rec.elements.each("irspy:status/irspy:probe") do | elem |
85           puts elem
87         a << elem.attributes["ok"]
88         end
89         puts a.length
90         puts a.inspect
92         STDIN.gets
94         line =  "- [" + rec.elements["serverInfo/host"].text + "," +
95           rec.elements["serverInfo/port"].text + "," +
96           rec.elements["serverInfo/database"].text
97         + "] #" + rec.elements["databaseInfo/title"].text
98         puts line
99         all_lines << line
100       end
101     end
102     #STDIN.gets
103     end
104     m += 20
107 puts all_lines
109 aFile = File.new("irspy.txt","w+")
110 aFile.write(all_lines)
111 aFile.close