7 require "rexml/document"
\r
8 require "rexml/xpath"
\r
11 #Hpricot.buffer_size = 262144
\r
12 TIME = Time.now.strftime("%Y%m%d%H%M%S")
\r
20 puts "And we begin!"
\r
23 puts "This is going to take a while...."
\r
26 doc = open("http://irspy.indexdata.com/find.html?cql.anywhere=&dc.title=&zeerex.country=&net.protocol=z39.50&net.host=&net.port=&net.path=&zeerex.libType=&dc.description=&dc.creator=&_sort=&_search=Search&_count=20&_skip=#{m-20}"){
\r
29 #fields = doc.search(/"//a[@title='Raw XML record']")
\r
30 #fields = doc.search("//a")
\r
31 #puts fields.inspect
\r
35 (doc/"//a[@title='Raw XML record']").each do |link|
\r
36 xml_address = "http://irspy.indexdata.com" + link.attributes['href']
\r
38 file = Net::HTTP.get(URI.parse(xml_address))
\r
42 full_xml = all_xml.join("\n")
\r
46 ######xml_doc = REXML::Document.new full_xml
\r
50 File.open("irspy-#{TIME}.xml", "a+") do |f|
\r
52 puts "written to file: #{i}"
\r