An idiomatic way of converting an Option[String] into an Option[Int] in Scala

This always returns an Option[Int]

Option("bad") filter { _ != None } map { catching(classOf[NumberFormatException]) opt _.toInt } getOrElse None
Share/Save

Javascript replace on a capture group

I ran into a problem recently where I needed to perform a regex replace on a string and also manipulate the string captured in a capture group at the same time. What I discovered is that its valid to pass a function as the second argument to the replace function which gets passed the capture groups as arguments 1+

So here’s my code to capture a person’s name and escape it.

story = story.replace(person\.go\?ID=\d+"\s*[^>]*>([^(<)]+)<\/a>/g,function() {
     return 'person.go?ID='+escape(arguments[1].replace(/\./g,''))+'">'+arguments[1]+'';
});

ios7 form input patch

ios7 appears to have broken input fields for a number of web applications. Input fields now take two taps to allow the user to input data even though the keyboard is brought up after only one click. Here’s a hack to fix the input fields for any of your webapps ios7 broke.

if(window.navigator.standalone) {
    var arr = document.all.tags("input");
    var len = arr.length;
    for(;len--;) {
        arr[len].addEventListener('touchstart',function(ev){
            var tel = ev.target;
            setTimeout(function() {
                tel.focus();
            }, 150);
        });
    }
}

Getting useful index information from MongoDB

Here is a MongoDB script for presenting index information in a more concise way than getIndexes() provides. This script also presents an index’s total size along with a breakdown of its size on all of the shards.

//mongo --eval="var collection='file';"

var ret = db[collection].getIndexes().map(function(i){
    return {"key":i.key, "name":i.name};
});

var o = {};
for(r in ret) {
    o[ret[r].name] = ret[r].key;
}

var cstats = db[collection].stats();
for(k in cstats.indexSizes) {
    o[k].totalsize = cstats.indexSizes[k];
}

var shardinfo = cstats.shards;
for(s in shardinfo) {
    for(k in shardinfo[s].indexSizes) {
        if(!o[k].shards) o[k].shards = {};
        o[k].shards[s] = shardinfo[s].indexSizes[k];
    }
}

printjson(o);

Produces the following output:

{
    "_id_" : {
        "_id" : 1,
        "totalsize" : 50501459568,
        "shards" : {
            "shard0000" : 18620766416,
            "shard0001" : 18117909712,
            "shard0002" : 13762783440
        }
    }
}

Tags:

Simple Scala Map/Reduce Job

I was recently tasked with writing a Hadoop map/reduce job. This job had the requirement of taking a list of regular expressions and scouring hundreds of gigs worth of log files for matches. Since I’ve been leaning more and more towards Scala I wanted to use it for my job but I also wanted to use Maven for my job’s package management to make the job easy to setup and extend. And finally, I wanted to have unit tests for my mapper and reducer and an overall job unit test. The result is this project I posted to GitHub as a template for future projects. I hope it proves as helpful for others as I’m sure it’ll be for me.

Tags: , , , ,

Select distinct for MongoDB

Here is a handy script I’ve been using for MongoDB to retrieve a list of all the fields used in a collection. This uses a map/reduce routine and has to comb over all the documents in a collection so you may want to exercise caution when using this script.

// usage:
// mongo localhost/foo --quiet --eval="var collection='bar';" getcollectionkeys.js
var mr = db.runCommand({
  "mapreduce":collection,
  "map":function() {
    for (var key in this) { emit(key, null); }
  },
  "reduce":function(key, stuff) { return null; }, 
  "out":collection + "_keys"
})

print(db[mr.result].distinct("_id"))

db[collection+"_keys"].drop()

Tags: , , ,

Simple PhantomJS web scraping script

Here is a simple web scraping script I wrote for PhantomJS, the immensely useful headless browser, to load a page, inject jQuery into it, and then scrape the page using a user-supplied jQuery selector.

page = require('webpage').create()
system = require 'system'

phantom.injectJs "static/js/underscore-min.js"

page.onConsoleMessage = (msg) ->
    if not msg.match /^Unsafe/
        console.log msg

scrapeEl = (elselector) ->
    rows = $ elselector
    for el in rows
        if el.innerHTML
            str = el.innerHTML.trim()
            if str.length > 0
                console.log str

page.open system.args[1], (status) ->
    if status isnt 'success'
        phantom.exit 1
    else
        page.injectJs "static/js/underscore-min.js"
        page.injectJs "static/js/utils.js"
        page.injectJs "static/js/jquery-1.8.2.min.js"
        page.evaluate scrapeEl, system.args[2]
        phantom.exit()

Run it with:

phantomjs scrape_element.coffee "http://www.moviefone.com/coming-soon" ".movieTitle span"

Tags:

Tracking the trackers

Tags:

Teaching cybersecurity

Origional

Tags: ,

MongoDB Security Considerations presentation at MongoSF 2012

Here is a presentation I gave at MongoSF 2012 on unique security considerations for MongoDB.

And here are my slides.

Tags: , , , ,