Pastie now auto-senses if line-wrap is a bad or good idea. Feedback?
## mark a section (Learn more)
This paste will be private.
# Erik Kastner 2008-02-12 Script to get our lost comments from google's cache require 'rubygems' require 'db' require 'open-uri' require 'hpricot' # this can be replaced with a call to Mysql.new(host, user, pass, db) - and require 'mysql' db = DB.new(:db => "free") # prepare the database queries - this makes it faster to call them later fetch_id = db.prepare("SELECT ID from wp_posts WHERE guid = ?") insert_comment = db.prepare("INSERT INTO wp_comments (comment_post_ID, comment_author, comment_date, comment_content, comment_approved, comment_agent) VALUES (?, ?, ?, ?, '1', 'ruby script from erik')") # an array of free / google-cache addresses urls = [ ["http://free.winelibrary.com/2008/01/25/leocadie-fernand-averoux-2006/", "http://64.233.169.104/search?q=cache:R5nLZeQOqw0J:free.winelibrary.com/2008/01/25/leocadie-fernand-averoux-2006/+site:free.winelibrary.com&hl=en&ct=clnk&cd=3&gl=us&client=safari"], ["http://free.winelibrary.com/2008/01/31/legras-st-vincent-1990/", "http://64.233.169.104/search?q=cache:p-8QDChsJLcJ:free.winelibrary.com/2008/01/31/legras-st-vincent-1990/+site:free.winelibrary.com&hl=en&ct=clnk&cd=4&gl=us&client=safari"], ["http://free.winelibrary.com/2008/01/24/calvet-thunevin-les-dentelles-2004/", "http://64.233.169.104/search?q=cache:PeTf8KOjS-oJ:free.winelibrary.com/2008/01/24/calvet-thunevin-les-dentelles-2004/+site:free.winelibrary.com&hl=en&ct=clnk&cd=5&gl=us&client=safari"], ["http://free.winelibrary.com/2008/02/04/chateau-henye-tokaji-aszueszencia-2000/", "http://64.233.169.104/search?q=cache:r_sJIN-PwEkJ:free.winelibrary.com/2008/02/04/chateau-henye-tokaji-aszueszencia-2000/+site:free.winelibrary.com&hl=en&ct=clnk&cd=6&gl=us&client=safari"], ["http://free.winelibrary.com/2007/03/08/vinoce-mt-veeder-red-2003/", "http://64.233.169.104/search?q=cache:pDBHCk9jjZ4J:free.winelibrary.com/2007/03/08/vinoce-mt-veeder-red-2003/+site:free.winelibrary.com&hl=en&ct=clnk&cd=7&gl=us&client=safari"], ["http://free.winelibrary.com/2008/02/11/mcprice-myers-grenache-lange-2005/", "http://64.233.169.104/search?q=cache:aBqHslC7IbsJ:free.winelibrary.com/2008/02/11/mcprice-myers-grenache-lange-2005/+site:free.winelibrary.com&hl=en&ct=clnk&cd=8&gl=us&client=safari"], ["http://free.winelibrary.com/2008/02/01/black-pig-ribera-del-duero-2006/", "http://64.233.169.104/search?q=cache:fH0EpNkaSlEJ:free.winelibrary.com/2008/02/01/black-pig-ribera-del-duero-2006/+site:free.winelibrary.com&hl=en&ct=clnk&cd=9&gl=us&client=safari"], ["http://free.winelibrary.com/2007/04/13/parparoussis-the-gift-of-dionysos-sideritis-2005/", "http://64.233.169.104/search?q=cache:Xu_iIowmPDUJ:free.winelibrary.com/2007/04/13/parparoussis-the-gift-of-dionysos-sideritis-2005/+site:free.winelibrary.com&hl=en&ct=clnk&cd=10&gl=us&client=safari"], ["http://free.winelibrary.com/2008/01/30/karydas-naoussa-2003-2/", "http://64.233.169.104/search?q=cache:VY8kMw8WWRQJ:free.winelibrary.com/2008/01/30/karydas-naoussa-2003-2/+site:free.winelibrary.com&hl=en&ct=clnk&cd=11&gl=us&client=safari"], ["http://free.winelibrary.com/2008/02/05/rhone-rebel-gsm-2005/", "http://64.233.169.104/search?q=cache:yQiLGGbyHoEJ:free.winelibrary.com/2008/02/05/rhone-rebel-gsm-2005/+site:free.winelibrary.com&hl=en&ct=clnk&cd=12&gl=us&client=safari"], ["http://free.winelibrary.com/2008/02/06/audelssa-tempest-2004/", "http://64.233.169.104/search?q=cache:B2-8Z8wU3mkJ:free.winelibrary.com/2008/02/06/audelssa-tempest-2004/+site:free.winelibrary.com&hl=en&ct=clnk&cd=13&gl=us&client=safari"] ] # loop through each pair of addresses urls.each do |pair| # get the wp_id from the guid (first address) fetch_id.execute(pair[0]) unless (id = fetch_id.fetch[0]) puts "Skipping #{pair[0]}, no id found in the db" next end # parse the google cache p = Hpricot(open(pair[1])) # loop through each li under .commentlist (p / ".commentlist li").each do |comment| # get the important information who = comment.at("cite").innerHTML body = (comment / "p").map { |p| p.innerHTML }.join("\n") date = Time.parse(comment.at(".commentmetadata").innerHTML) # insert into the db insert_comment.execute(id, who, date, body) end end
From the Design Piracy series on my blog: