1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class DeliTagScrapingWorker < BackgrounDRb::MetaWorker

	require 'scrubyt'

  set_worker_name :deli_tag_scraping_worker
  def create(args = nil)
    # this method is called, when worker is loaded for the first time
  end
  
  def get_links(parameterhash)
  
  	tag = parameterhash["tag"]
  	logger.info "###### tag variable = " + tag
    delicious_resource = 'http://del.icio.us/tag/' + tag   
    logger.info "###### resource = " + delicious_resource 
    	
    	logger.info "###### about to build scrubyt extractor"
      Scrubyt::Extractor.define do
      	logger.info "###### build. trying to connect"
        fetch delicious_resource 
        logger.info "###### connected"
          item '/html/body/div/ol/li/h4/a' do
            url 'href', :type => :attribute
            logger.info "###### current url = " + url
            current_link = Link.new
            current_link.linkhash = "klappt"
            current_link.url = url.to_string
            current_link.source_id = parameterhash["id"] # reference links to source that was created in the boxes controller
            current_link.save   
            logger.info "###### hier sollte der link gespeichert worden sein"            
          end
          logger.info "###### gehe zur naechstens seite"
          next_page "&laquo; earlier", :limit => 5
      end      
      logger.info "###### fertig"
  end
  
end