Simple PhantomJS web scraping script

Here is a simple web scraping script I wrote for PhantomJS, the immensely useful headless browser, to load a page, inject jQuery into it, and then scrape the page using a user-supplied jQuery selector.

page = require('webpage').create()
system = require 'system'

phantom.injectJs "static/js/underscore-min.js"

page.onConsoleMessage = (msg) ->
    if not msg.match /^Unsafe/
        console.log msg

scrapeEl = (elselector) ->
    rows = $ elselector
    for el in rows
        if el.innerHTML
            str = el.innerHTML.trim()
            if str.length > 0
                console.log str system.args[1], (status) ->
    if status isnt 'success'
        phantom.exit 1
        page.injectJs "static/js/underscore-min.js"
        page.injectJs "static/js/utils.js"
        page.injectJs "static/js/jquery-1.8.2.min.js"
        page.evaluate scrapeEl, system.args[2]

Run it with:

phantomjs "" ".movieTitle span"

No related content found.


Leave a comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.