1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
set_worker_name :deli_tag_scraping_worker # this method is called, when worker is loaded for the first time end tag = parameterhash["tag"] puts "###### tag variable = " + tag delicious_resource = 'http://del.icio.us/tag/' + tag begin (delicious_resource,parameterhash["id"]) rescue Timeout puts "Timeout Error" rescue puts "" end end p_url,pid puts p_url,pid puts "starting scrapping" Scrubyt do fetch p_url item '/html/body/div/ol/li/h4/a' do url 'href', :type => :attribute current_link = Link current_link = "klappt" current_link = url current_link = pid # reference links to source that was created in the boxes controller current_link end next_page "« earlier", :limit => 5 end puts "Scrapping done" end end |
Pastie