#190 Screen Scraping with Nokogiri
Nov 30, 2009 | 13 minutes | Tools
Screen scraping is easy with Nokogiri and SelectorGadget.
- Download:
- source codeProject Files in Zip (97.9 KB)
- mp4Full Size H.264 Video (29.2 MB)
- m4vSmaller H.264 Video (16.7 MB)
- webmFull Size VP8 Video (38.5 MB)
- ogvFull Size Theora Video (36.2 MB)
Resources
bash
sudo gem install nokogiri -- --with-xml2-include=/usr/local/include/libxml2 --with-xml2-lib=/usr/local/lib
sudo gem install nokogiri -- --with-xml2-include=/usr/local/include/libxml2 --with-xml2-lib=/usr/local/lib
nokogiri_test.rb
require 'rubygems'
require 'nokogiri'
require 'open-uri'
url = "http://www.walmart.com/search/search-ng.do?search_constraint=0&ic=48_0&search_query=batman&Find.x=0&Find.y=0&Find=Find"
doc = Nokogiri::HTML(open(url))
puts doc.at_css("title").text
doc.css(".item").each do |item|
title = item.at_css(".prodLink").text
price = item.at_css(".PriceCompare .BodyS, .PriceXLBold").text[/\$[0-9\.]+/]
puts "#{title} - #{price}"
puts item.at_css(".prodLink")[:href]
end
require 'rubygems' require 'nokogiri' require 'open-uri' url = "http://www.walmart.com/search/search-ng.do?search_constraint=0&ic=48_0&search_query=batman&Find.x=0&Find.y=0&Find=Find" doc = Nokogiri::HTML(open(url)) puts doc.at_css("title").text doc.css(".item").each do |item| title = item.at_css(".prodLink").text price = item.at_css(".PriceCompare .BodyS, .PriceXLBold").text[/\$[0-9\.]+/] puts "#{title} - #{price}" puts item.at_css(".prodLink")[:href] end
lib/tasks/product_prices.rake
desc "Fetch product prices"
task :fetch_prices => :environment do
require 'nokogiri'
require 'open-uri'
Product.find_all_by_price(nil).each do |product|
url = "http://www.walmart.com/search/search-ng.do?search_constraint=0&ic=48_0&search_query=#{CGI.escape(product.name)}&Find.x=0&Find.y=0&Find=Find"
doc = Nokogiri::HTML(open(url))
price = doc.at_css(".PriceCompare .BodyS, .PriceXLBold").text[/[0-9\.]+/]
product.update_attribute(:price, price)
end
end
desc "Fetch product prices" task :fetch_prices => :environment do require 'nokogiri' require 'open-uri' Product.find_all_by_price(nil).each do |product| url = "http://www.walmart.com/search/search-ng.do?search_constraint=0&ic=48_0&search_query=#{CGI.escape(product.name)}&Find.x=0&Find.y=0&Find=Find" doc = Nokogiri::HTML(open(url)) price = doc.at_css(".PriceCompare .BodyS, .PriceXLBold").text[/[0-9\.]+/] product.update_attribute(:price, price) end end

