1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
require 'scrubyt'

class DeliTagScrapingWorker < BackgrounDRb::MetaWorker
  set_worker_name :deli_tag_scraping_worker
  def create(args = nil)
    # this method is called, when worker is loaded for the first time
  end

  def get_links(parameterhash)

    tag = parameterhash["tag"]
    logger.info "###### tag variable = " + tag
    delicious_resource = 'http://del.icio.us/tag/' + tag
    begin
      scrap_delicio_resource(delicious_resource,parameterhash["id"])
    rescue Timeout::Error
      puts "Timeout Error"
    rescue
      puts "#{$!.message}"
    end
  end

  def scrap_delicio_resource p_url,pid
    Scrubyt::Extractor.define do
      fetch p_url
      item '/html/body/div/ol/li/h4/a' do
        url 'href', :type => :attribute
        current_link = Link.new
        current_link.linkhash = "klappt"
        current_link.url = url.to_string
        current_link.source_id = pid # reference links to source that was created in the boxes controller
        current_link.save
      end
      next_page "&laquo; earlier", :limit => 5
    end
  end

end