diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d2b47d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea +node_modules \ No newline at end of file diff --git a/.jshintrc b/.jshintrc new file mode 100644 index 0000000..da3efbf --- /dev/null +++ b/.jshintrc @@ -0,0 +1,91 @@ +{ + // JSHint Default Configuration File (as on JSHint website) + // See http://jshint.com/docs/ for more details + + "maxerr" : 50, // {int} Maximum error before stopping + + // Enforcing + "bitwise" : true, // true: Prohibit bitwise operators (&, |, ^, etc.) + "camelcase" : false, // true: Identifiers must be in camelCase + "curly" : false, // true: Require {} for every new block or scope + "eqeqeq" : true, // true: Require triple equals (===) for comparison + "forin" : true, // true: Require filtering for..in loops with obj.hasOwnProperty() + "freeze" : true, // true: prohibits overwriting prototypes of native objects such as Array, Date etc. + "immed" : false, // true: Require immediate invocations to be wrapped in parens e.g. `(function () { } ());` + "latedef" : false, // true: Require variables/functions to be defined before being used + "newcap" : false, // true: Require capitalization of all constructor functions e.g. `new F()` + "noarg" : true, // true: Prohibit use of `arguments.caller` and `arguments.callee` + "noempty" : false, // true: Prohibit use of empty blocks + "nonbsp" : true, // true: Prohibit "non-breaking whitespace" characters. + "nonew" : false, // true: Prohibit use of constructors for side-effects (without assignment) + "plusplus" : false, // true: Prohibit use of `++` and `--` + "quotmark" : false, // Quotation mark consistency: + // false : do nothing (default) + // true : ensure whatever is used is consistent + // "single" : require single quotes + // "double" : require double quotes + "undef" : true, // true: Require all non-global variables to be declared (prevents global leaks) + "unused" : true, // Unused variables: + // true : all variables, last function parameter + // "vars" : all variables only + // "strict" : all variables, all function parameters + "strict" : false, // true: Requires all functions run in ES5 Strict Mode + "maxparams" : false, // {int} Max number of formal params allowed per function + "maxdepth" : false, // {int} Max depth of nested blocks (within functions) + "maxstatements" : false, // {int} Max number statements per function + "maxcomplexity" : false, // {int} Max cyclomatic complexity per function + "maxlen" : false, // {int} Max number of characters per line + "varstmt" : false, // true: Disallow any var statements. Only `let` and `const` are allowed. + + // Relaxing + "asi" : false, // true: Tolerate Automatic Semicolon Insertion (no semicolons) + "boss" : false, // true: Tolerate assignments where comparisons would be expected + "debug" : false, // true: Allow debugger statements e.g. browser breakpoints. + "eqnull" : false, // true: Tolerate use of `== null` + "esversion" : 5, // {int} Specify the ECMAScript version to which the code must adhere. + "moz" : false, // true: Allow Mozilla specific syntax (extends and overrides esnext features) + // (ex: `for each`, multiple try/catch, function expression…) + "evil" : false, // true: Tolerate use of `eval` and `new Function()` + "expr" : false, // true: Tolerate `ExpressionStatement` as Programs + "funcscope" : false, // true: Tolerate defining variables inside control statements + "globalstrict" : false, // true: Allow global "use strict" (also enables 'strict') + "iterator" : false, // true: Tolerate using the `__iterator__` property + "lastsemic" : false, // true: Tolerate omitting a semicolon for the last statement of a 1-line block + "laxbreak" : false, // true: Tolerate possibly unsafe line breakings + "laxcomma" : false, // true: Tolerate comma-first style coding + "loopfunc" : false, // true: Tolerate functions being defined in loops + "multistr" : false, // true: Tolerate multi-line strings + "noyield" : false, // true: Tolerate generator functions with no yield statement in them. + "notypeof" : false, // true: Tolerate invalid typeof operator values + "proto" : false, // true: Tolerate using the `__proto__` property + "scripturl" : false, // true: Tolerate script-targeted URLs + "shadow" : false, // true: Allows re-define variables later in code e.g. `var x=1; x=2;` + "sub" : false, // true: Tolerate using `[]` notation when it can still be expressed in dot notation + "supernew" : false, // true: Tolerate `new function () { ... };` and `new Object;` + "validthis" : false, // true: Tolerate using this in a non-constructor function + + // Environments + "browser" : true, // Web Browser (window, document, etc) + "browserify" : false, // Browserify (node.js code in the browser) + "couch" : false, // CouchDB + "devel" : true, // Development/debugging (alert, confirm, etc) + "dojo" : false, // Dojo Toolkit + "jasmine" : false, // Jasmine + "jquery" : false, // jQuery + "mocha" : true, // Mocha + "mootools" : false, // MooTools + "node" : true, // Node.js + "nonstandard" : false, // Widely adopted globals (escape, unescape, etc) + "phantom" : false, // PhantomJS + "prototypejs" : false, // Prototype and Scriptaculous + "qunit" : false, // QUnit + "rhino" : false, // Rhino + "shelljs" : false, // ShellJS + "typed" : false, // Globals for typed array constructions + "worker" : false, // Web Workers + "wsh" : false, // Windows Scripting Host + "yui" : false, // Yahoo User Interface + + // Custom Globals + "globals" : {} // additional predefined global variables +} \ No newline at end of file diff --git a/README.md b/README.md index 2020aaf..c4f668f 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,37 @@ returned `Hashes` object, a `hash` which is the hash of hashes, and `files`, which is a key/value pairing of filename to hash. Each file might be another object, which indicates there was a directory tree encountered. +### Skipping Files/Directories Usage + +There is an additional parameter to the method, an `ignoreCallback` function that can be provided to allow the algorithm to skip over certain files or directories. +This callback has the signature: `function(path, filename)`. An example is as follows: + + var dirsum = require('../lib/dirsum'); + + dirsum.digest('/your/tree', 'sha1', function(err, hashes) { + if (err) throw err; + console.log(JSON.stringify(hashes, null, 2)); + }, function(path, filename) { + // ignore any files starting with '.' + if (filename.indexOf('.') === 0) { + return true; + } + return false; + }); + +### NOTES + +If the `method` parameter (i.e. the second parameter) is not passed as a string, the `callback` and `ignoreCallback` are effectively shifted left respectively. +In other words the signature for the `digest` method looks either like this: + + dirsum.digest(path : string, method : string, callback: function(err, hashes), ignoreCallback : function(path, filename)); + +OR + + dirsum.digest(path : string, callback : function(err, hashes), ignoreCallback : function(path, filename)); + +Finally, the file read implementation uses streams so that large files are NOT read-entirely into memory. + ## Installation npm install dirsum diff --git a/lib/dirsum.js b/lib/dirsum.js index 33e98dd..7b2c2a6 100644 --- a/lib/dirsum.js +++ b/lib/dirsum.js @@ -24,7 +24,61 @@ function _summarize(method, hashes) { return obj; } -function digest(root, method, callback) { +function doDigesting(dirName, method, callback, ignoreCallback) { + var hashes = {}; + fs.readdir(dirName, function(err, files) { + if (err) return callback(err); + + if (files.length === 0) { + return callback(undefined, { hash: '', files: {} }); + } + + var processed = 0; + var total = files.length; + function bookkeeping() { + if (++processed >= total) { + callback(undefined, _summarize(method, hashes)); + } + } + files.forEach(function(f) { + var path = dirName + '/' + f; + if (!ignoreCallback || !ignoreCallback(path, f)) { + fs.stat(path, function(err, stats) { + if (err) return callback(err); + + if (stats.isDirectory()) { + return doDigesting(path, method, function(err, hash) { + if (err) return callback(err); + + hashes[f] = hash; + bookkeeping(); + }, ignoreCallback, hashes); + } else if (stats.isFile()) { + var hash = crypto.createHash(method); + var stream = fs.createReadStream(path, { encoding: 'utf8' }); + stream.on('data', function(data) { + hash.update(data); + }); + stream.on('end', function() { + hashes[f] = hash.digest('hex'); + bookkeeping(); + }); + stream.on('error', function(err) { + callback(err); + }); + } else { + console.error('Skipping hash of %s', f); + bookkeeping(); + } + }); + } else { + bookkeeping(); + } + }); + }); +} + +function digest(root, method, callback, ignoreCallback) { if (!root || typeof(root) !== 'string') { throw new TypeError('root is required (string)'); } @@ -32,6 +86,7 @@ function digest(root, method, callback) { if (typeof(method) === 'string') { // NO-OP } else if (typeof(method) === 'function') { + ignoreCallback = callback; callback = method; method = 'md5'; } else { @@ -44,51 +99,13 @@ function digest(root, method, callback) { throw new TypeError('callback is required (function)'); } - var hashes = {}; - - fs.readdir(root, function(err, files) { - if (err) return callback(err); - - if (files.length === 0) { - return callback(undefined, {hash: '', files: {}}); - } - - var hashed = 0; - files.forEach(function(f) { - var path = root + '/' + f; - fs.stat(path, function(err, stats) { - if (err) return callback(err); - - if (stats.isDirectory()) { - return digest(path, method, function(err, hash) { - if (err) return hash; - - hashes[f] = hash; - if (++hashed >= files.length) { - return callback(undefined, _summarize(method, hashes)); - } - }); - } else if (stats.isFile()) { - fs.readFile(path, 'utf8', function(err, data) { - if (err) return callback(err); - - var hash = crypto.createHash(method); - hash.update(data); - hashes[f] = hash.digest('hex'); + if (ignoreCallback && typeof(ignoreCallback) !== 'function') { + throw new TypeError('ignoreCallback is required to be a function'); + } - if (++hashed >= files.length) { - return callback(undefined, _summarize(method, hashes)); - } - }); - } else { - console.error('Skipping hash of %s', f); - if (++hashed > files.length) { - return callback(undefined, _summarize(method, hashes)); - } - } - }); - }); - }); + doDigesting(root, method, function(err, hashes) { + callback(err, hashes); + }, ignoreCallback); } module.exports = { diff --git a/package.json b/package.json index 37d9b06..dbce9b5 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "dirsum", "description": "A small library that computes checksums of directory trees", - "version": "0.1.1", + "version": "0.1.2", "repository": { "type": "git", "url": "git://github.com/mcavage/node-dirsum.git" diff --git a/tst/dirsum.test.js b/tst/dirsum.test.js index 341948a..4716b4a 100644 --- a/tst/dirsum.test.js +++ b/tst/dirsum.test.js @@ -9,3 +9,13 @@ dirsum.digest(process.cwd() + '/tst/openldap', function(err, hashes) { assert.ok(hashes); console.log(JSON.stringify(hashes, null, 2)); }); + +dirsum.digest(process.cwd() + '/tst', function(err, hashes) { + assert.ok(!err); + assert.ok(hashes); + assert.ok(hashes.files); + assert.equal(hashes.files['dirsum.test.js'], undefined); + console.log(JSON.stringify(hashes, null, 2)); +}, function(path, filename) { + return filename === 'dirsum.test.js'; +});