Report abuse

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
desc "Grab all the mp3/m4a files from a web page"
task "grab:mp3s" do
  require 'hpricot'
  require 'open-uri'

  taskname = ARGV.shift

  if ARGV.empty?
    $stderr.puts "usage: #{File.basename($0)} #{taskname} <mp3dir-uri>"
    exit(1)
  end

  uri = ARGV.first

  doc = open(uri) {|f| Hpricot(f)}
  links = (doc/"a").map do |a|
    a.get_attribute("href")
  end.select do |link|
    link.match(/\.(mp3|m4a)$/)
  end.map { |link| URI.join(uri, link) }

  dirname = URI.unescape(File.basename(uri))
  FileUtils.mkdir(dirname)

  links.each do |link|
    filename = File.join(dirname, URI.unescape(File.basename(link.to_s)))
    puts filename
    open(filename, 'w').write(link.read)
  end
end