Here is a simple web scraping script I wrote for PhantomJS, the immensely useful headless browser, to load a page, inject jQuery into it, and then scrape the page using a user-supplied jQuery selector.
page = require('webpage').create() system = require 'system' phantom.injectJs "static/js/underscore-min.js" page.onConsoleMessage = (msg) -> if not msg.match /^Unsafe/ console.log msg scrapeEl = (elselector) -> rows = $ elselector for el in rows if el.innerHTML str = el.innerHTML.trim() if str.length > 0 console.log str page.open system.args[1], (status) -> if status isnt 'success' phantom.exit 1 else page.injectJs "static/js/underscore-min.js" page.injectJs "static/js/utils.js" page.injectJs "static/js/jquery-1.8.2.min.js" page.evaluate scrapeEl, system.args[2] phantom.exit()
Run it with:
phantomjs scrape_element.coffee "http://www.moviefone.com/coming-soon" ".movieTitle span"