JavaScript MD5 hash for air.FileStream

This is the MD5 file hash used in progTools.

You can copy and paste the code below or get it via. bitbucket.

The code builds upon Paul Johnston’s MD5 implementation and you’ll need to include his code first.

<!-- Paul Johnston's MD5 implementation -->
<script type="text/javascript" src="tools/md5/md5.js"></script>

<!-- Additional MD5 functions for working on air.FileStream; requires Paj's md5.js -->
<script type="text/javascript" src="tools/md5/md5_file.js"></script>

core_md5_ex() is a modified/hacked version of the original core_md5() to allow for progressively processing chunks of data, instead of doing it all in one go.

// modified version of Paul Johnston's MD5 implementation
function core_md5_ex(x, len, abcd_start, append_padding, total_len)
{
/* append padding */
if (append_padding) {
    x[len >> 5] |= 0x80 << ((len) % 32);
    x[(((len + 64) >>> 9) << 4) + 14] = total_len*8;
}

var a = abcd_start[0];
var b = abcd_start[1];
var c = abcd_start[2];
var d = abcd_start[3];

for(var i = 0; i < x.length; i += 16)
{
var olda = a;
var oldb = b;
var oldc = c;
var oldd = d;

a = md5_ff(a, b, c, d, x[i+ 0], 7 , -680876936);
d = md5_ff(d, a, b, c, x[i+ 1], 12, -389564586);
c = md5_ff(c, d, a, b, x[i+ 2], 17, 606105819);
b = md5_ff(b, c, d, a, x[i+ 3], 22, -1044525330);
a = md5_ff(a, b, c, d, x[i+ 4], 7 , -176418897);
d = md5_ff(d, a, b, c, x[i+ 5], 12, 1200080426);
c = md5_ff(c, d, a, b, x[i+ 6], 17, -1473231341);
b = md5_ff(b, c, d, a, x[i+ 7], 22, -45705983);
a = md5_ff(a, b, c, d, x[i+ 8], 7 , 1770035416);
d = md5_ff(d, a, b, c, x[i+ 9], 12, -1958414417);
c = md5_ff(c, d, a, b, x[i+10], 17, -42063);
b = md5_ff(b, c, d, a, x[i+11], 22, -1990404162);
a = md5_ff(a, b, c, d, x[i+12], 7 , 1804603682);
d = md5_ff(d, a, b, c, x[i+13], 12, -40341101);
c = md5_ff(c, d, a, b, x[i+14], 17, -1502002290);
b = md5_ff(b, c, d, a, x[i+15], 22, 1236535329);

a = md5_gg(a, b, c, d, x[i+ 1], 5 , -165796510);
d = md5_gg(d, a, b, c, x[i+ 6], 9 , -1069501632);
c = md5_gg(c, d, a, b, x[i+11], 14, 643717713);
b = md5_gg(b, c, d, a, x[i+ 0], 20, -373897302);
a = md5_gg(a, b, c, d, x[i+ 5], 5 , -701558691);
d = md5_gg(d, a, b, c, x[i+10], 9 , 38016083);
c = md5_gg(c, d, a, b, x[i+15], 14, -660478335);
b = md5_gg(b, c, d, a, x[i+ 4], 20, -405537848);
a = md5_gg(a, b, c, d, x[i+ 9], 5 , 568446438);
d = md5_gg(d, a, b, c, x[i+14], 9 , -1019803690);
c = md5_gg(c, d, a, b, x[i+ 3], 14, -187363961);
b = md5_gg(b, c, d, a, x[i+ 8], 20, 1163531501);
a = md5_gg(a, b, c, d, x[i+13], 5 , -1444681467);
d = md5_gg(d, a, b, c, x[i+ 2], 9 , -51403784);
c = md5_gg(c, d, a, b, x[i+ 7], 14, 1735328473);
b = md5_gg(b, c, d, a, x[i+12], 20, -1926607734);

a = md5_hh(a, b, c, d, x[i+ 5], 4 , -378558);
d = md5_hh(d, a, b, c, x[i+ 8], 11, -2022574463);
c = md5_hh(c, d, a, b, x[i+11], 16, 1839030562);
b = md5_hh(b, c, d, a, x[i+14], 23, -35309556);
a = md5_hh(a, b, c, d, x[i+ 1], 4 , -1530992060);
d = md5_hh(d, a, b, c, x[i+ 4], 11, 1272893353);
c = md5_hh(c, d, a, b, x[i+ 7], 16, -155497632);
b = md5_hh(b, c, d, a, x[i+10], 23, -1094730640);
a = md5_hh(a, b, c, d, x[i+13], 4 , 681279174);
d = md5_hh(d, a, b, c, x[i+ 0], 11, -358537222);
c = md5_hh(c, d, a, b, x[i+ 3], 16, -722521979);
b = md5_hh(b, c, d, a, x[i+ 6], 23, 76029189);
a = md5_hh(a, b, c, d, x[i+ 9], 4 , -640364487);
d = md5_hh(d, a, b, c, x[i+12], 11, -421815835);
c = md5_hh(c, d, a, b, x[i+15], 16, 530742520);
b = md5_hh(b, c, d, a, x[i+ 2], 23, -995338651);

a = md5_ii(a, b, c, d, x[i+ 0], 6 , -198630844);
d = md5_ii(d, a, b, c, x[i+ 7], 10, 1126891415);
c = md5_ii(c, d, a, b, x[i+14], 15, -1416354905);
b = md5_ii(b, c, d, a, x[i+ 5], 21, -57434055);
a = md5_ii(a, b, c, d, x[i+12], 6 , 1700485571);
d = md5_ii(d, a, b, c, x[i+ 3], 10, -1894986606);
c = md5_ii(c, d, a, b, x[i+10], 15, -1051523);
b = md5_ii(b, c, d, a, x[i+ 1], 21, -2054922799);
a = md5_ii(a, b, c, d, x[i+ 8], 6 , 1873313359);
d = md5_ii(d, a, b, c, x[i+15], 10, -30611744);
c = md5_ii(c, d, a, b, x[i+ 6], 15, -1560198380);
b = md5_ii(b, c, d, a, x[i+13], 21, 1309151649);
a = md5_ii(a, b, c, d, x[i+ 4], 6 , -145523070);
d = md5_ii(d, a, b, c, x[i+11], 10, -1120210379);
c = md5_ii(c, d, a, b, x[i+ 2], 15, 718787259);
b = md5_ii(b, c, d, a, x[i+ 9], 21, -343485551);

a = safe_add(a, olda);
b = safe_add(b, oldb);
c = safe_add(c, oldc);
d = safe_add(d, oldd);
}

return Array(a, b, c, d);
}

What’s changed is:

  • We pass in the starting values (which is the current hash on the data thus far, minus the necessary padding)
  • We use a variable to determine whether or not to append padding to the input; this is only done when processing the last block on data read from the stream.

hex_md5_stream() is the function which processes the stream: reading in chunks, transforming the bytes in inBytes into an array 4-byte WORDs, and calling core_md5_ex() with the necessary data.

function hex_md5_stream(inStream)
{
    
var abcd_start = new Array();
    abcd_start.push(1732584193);
    abcd_start.push(-271733879);
    abcd_start.push(-1732584194);
    abcd_start.push(271733878);
    
    
var inBytes = new air.ByteArray();
    
var appendPaddingToBlock = false;
    
var totalLen = 0;
    
    
while (inStream.bytesAvailable > 0)
    {
        inStream.readBytes(inBytes, 0, Math.min(8192, inStream.bytesAvailable));        
        
if(inBytes.length < 8192)
        {
            appendPaddingToBlock =
true;
        }
        
        totalLen += inBytes.length;
        
        bin =
new Array();
        
for(var i = 0; i < inBytes.length * 8; i+=8)
            bin[i>>5] |= (inBytes[i>>3] & 0xff) << (i%32);
        
        abcd_start = core_md5_ex(bin, inBytes.length*8, abcd_start, appendPaddingToBlock, totalLen);
        inBytes.clear();
    }
    
    
return binl2hex(abcd_start);    
}

Here’s the top-level view of how it works:

var inStream = new air.FileStream();
                
var file = new air.File();
file.url =
"file:///" + filename;                

inStream.open(file, air.FileMode.READ);
var md5result = hex_md5_stream(inStream);                 
inStream.close();             

6 Comments

  1. Hi Avishkar,

    Thanks for pointing me to this!

    Seem to having a bit of trouble getting it to work.

    When I call the MD5 function it returns:

    ReferenceError: Can’t find variable: binl2hex
    hex_md5_stream at app:/js/md5.air.FileStream.js : 160
    md5LocalFile at app:/md5.html : 29
    onclick at app:/md5.html : 39

    Heres my code:

    MD5 File Test

    // MD5 Stuff
    function md5LocalFile(){
    var filename = “/Users/cbourne/Documents/Aptana Studio Workspace/md5/LocalFile.txt”
    var inStream = new air.FileStream();
    var file = new air.File();
    file.url = “file:///” + filename;

    inStream.open(file, air.FileMode.READ);
    var md5result = hex_md5_stream(inStream);
    inStream.close();

    return md5result;
    }

    Any pointers would be much appreciated.

    Regards,

    Carl

  2. Avishkar Autar

    ah, I just noticed that the newest version of Paul Johnston’s code changes quite a few things and removed the binl2hex function.

    Here’s the binl2hex function; add it to md5.air.FileStream.js, and it should work.

    function binl2hex(binarray)
    {
    var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
    var str = "";
    for (var i = 0; i < binarray.length * 4; i++) {
    str += hex_tab.charAt((binarray[i >> 2] >> ((i % 4) * 8 + 4)) & 0xF) +
    hex_tab.charAt((binarray[i >> 2] >> ((i % 4) * 8)) & 0xF);
    }
    return str;
    }

  3. Thanks Avishkar,

    That did the trick!

    Carl

  4. Thank you so much for this! I’m porting it to regular browser js, for use with the FileReader API.

  5. ivan bishop

    Using this method to calc md5sum of downloaded files, it works, but is very, very slow……
    a 100K TAR file took almost 7 seconds

    on a 4G dual core I5 laptop running ubuntu 10.04
    md5sum took a fraction of a second…
    any idea why so slow??

    Many thanks
    ivan

  6. Avishkar Autar

    I have noticed the poor performance as well.

    The performance is primarily dependent upon the Javascript engine used by Adobe Air, which I believe is Webkit’s Javascript engine, SquirrelFish. Also, Javascript itself does not handle this sort of low-level number crunching well, so I doubt any JS implementation could match an application like md5sum.