7 require "rexml/document"
\r
8 require "rexml/xpath"
\r
11 #Hpricot.buffer_size = 262144
\r
17 puts "And we begin!"
\r
20 puts "This is going to take a while...."
\r
23 doc = open("http://irspy.indexdata.com/find.html?cql.anywhere=&dc.title=&zeerex.country=&net.protocol=z39.50&net.host=&net.port=&net.path=&zeerex.libType=&dc.description=&dc.creator=&_sort=&_search=Search&_count=20&_skip=#{m-20}"){
\r
26 #fields = doc.search(/"//a[@title='Raw XML record']")
\r
27 #fields = doc.search("//a")
\r
28 #puts fields.inspect
\r
32 (doc/"//a[@title='Raw XML record']").each do |link|
\r
33 xml_address = "http://irspy.indexdata.com" + link.attributes['href']
\r
35 file = Net::HTTP.get(URI.parse(xml_address))
\r
39 full_xml = "<root>" + all_xml.join("\n") + "</root>"
\r
43 ######xml_doc = REXML::Document.new full_xml
\r
47 File.open("irspy#{i}.xml", "w") do |f|
\r
49 puts "written to file: #{i}"
\r