From cd5649a1e8b20ffea751989d0b0d99d7655f60a8 Mon Sep 17 00:00:00 2001 From: reality Date: Sun, 13 Jan 2013 15:45:07 +0000 Subject: [PATCH] underscorise spelling [#81] --- modules/spelling/spelling.js | 87 +++++++++++++++++++++++++++++++++--- snippets.js | 73 ------------------------------ 2 files changed, 81 insertions(+), 79 deletions(-) diff --git a/modules/spelling/spelling.js b/modules/spelling/spelling.js index 49b0403..5c955ef 100644 --- a/modules/spelling/spelling.js +++ b/modules/spelling/spelling.js @@ -1,8 +1,83 @@ +var _ = require('underscore')._; + +var allGroupings = function(arr) { + if (arr.length == 0) { + return []; /* short-circuit the empty-array case */ + } + var groupings = []; + for(var n=1;n<=arr.length;n++) { + for(var i=0;i<(arr.length-(n-1));i++) { + groupings.push(arr.slice(i, i+n)); + } + } + return groupings; +} +var distance = function(s1, s2) { + // Calculate Levenshtein distance between two strings + // + // version: 1109.2015 + // discuss at: http://phpjs.org/functions/levenshtein + // + original by: Carlos R. L. Rodrigues (http://www.jsfromhell.com) + // + bugfixed by: Onno Marsman + // + revised by: Andrea Giammarchi (http://webreflection.blogspot.com) + // + reimplemented by: Brett Zamir (http://brett-zamir.me) + // + reimplemented by: Alexander M Beedie + if (s1 == s2) { + return 0; + } + var s1_len = s1.length; + var s2_len = s2.length; + if (s1_len === 0) { + return s2_len; } + if (s2_len === 0) { + return s1_len; + } + // BEGIN STATIC + var split = false; + try { + split = !('0')[0]; + } catch (e) { + split = true; // Earlier IE may not support access by string index + } + // END STATIC + if (split) { + s1 = s1.split(''); s2 = s2.split(''); + } + + var v0 = new Array(s1_len + 1); + var v1 = new Array(s1_len + 1); + var s1_idx = 0, + s2_idx = 0, + cost = 0; + for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) { v0[s1_idx] = s1_idx; + } + var char_s1 = '', + char_s2 = ''; + for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) { v1[0] = s2_idx; + char_s2 = s2[s2_idx - 1]; + + for (s1_idx = 0; s1_idx < s1_len; s1_idx++) { + char_s1 = s1[s1_idx]; cost = (char_s1 == char_s2) ? 0 : 1; + var m_min = v0[s1_idx + 1] + 1; + var b = v1[s1_idx] + 1; + var c = v0[s1_idx] + cost; + if (b < m_min) { m_min = b; + } + if (c < m_min) { + m_min = c; + } v1[s1_idx + 1] = m_min; + } + var v_tmp = v0; + v0 = v1; + v1 = v_tmp; } + return v0[s1_len]; +}; + var spelling = function(dbot) { var last = {}; - var correct = function (event, correction, candidate, output_callback) { - var rawCandidates = last[event.channel.name][candidate].split(' ').allGroupings(); + var rawCandidates = allGroupings(last[event.channel.name][candidate].split(' ')); + var candidates = []; for(var i=0;i 0)) { + var d = distance(correction.toLowerCase(), candidates[i].toLowerCase()); + if((d < winnerDistance) && (d > 0)) { winner = candidates[i]; - winnerDistance = distance; + winnerDistance = d; } } @@ -51,7 +126,7 @@ var spelling = function(dbot) { event.reply(dbot.t('spelling_other', e)); }); } else { - if(last.hasOwnProperty(event.channel.name)) { + if(_.has(last, event.channel.name)) { last[event.channel.name][event.user] = event.message; } else { last[event.channel.name] = { }; diff --git a/snippets.js b/snippets.js index 6f703cd..f8fcfe4 100644 --- a/snippets.js +++ b/snippets.js @@ -35,19 +35,6 @@ Array.prototype.sum = function() { return sum; }; -Array.prototype.allGroupings = function() { - if (this.length == 0) { - return []; /* short-circuit the empty-array case */ - } - var groupings = []; - for(var n=1;n<=this.length;n++) { - for(var i=0;i<(this.length-(n-1));i++) { - groupings.push(this.slice(i, i+n)); - } - } - return groupings; -} - Array.prototype.uniq = function() { var hash = {} var result = []; @@ -79,66 +66,6 @@ String.prototype.startsWith = function(needle) { return needle === this.slice(0, needle.length); }; -String.prototype.distance = function(s1, s2) { - // Calculate Levenshtein distance between two strings - // - // version: 1109.2015 - // discuss at: http://phpjs.org/functions/levenshtein // + original by: Carlos R. L. Rodrigues (http://www.jsfromhell.com) - // + bugfixed by: Onno Marsman - // + revised by: Andrea Giammarchi (http://webreflection.blogspot.com) - // + reimplemented by: Brett Zamir (http://brett-zamir.me) - // + reimplemented by: Alexander M Beedie // * example 1: levenshtein('Kevin van Zonneveld', 'Kevin van Sommeveld'); - // * returns 1: 3 - if (s1 == s2) { - return 0; - } - var s1_len = s1.length; - var s2_len = s2.length; - if (s1_len === 0) { - return s2_len; } - if (s2_len === 0) { - return s1_len; - } - // BEGIN STATIC - var split = false; - try { - split = !('0')[0]; - } catch (e) { split = true; // Earlier IE may not support access by string index - } - // END STATIC - if (split) { - s1 = s1.split(''); s2 = s2.split(''); - } - - var v0 = new Array(s1_len + 1); - var v1 = new Array(s1_len + 1); - var s1_idx = 0, - s2_idx = 0, - cost = 0; - for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) { v0[s1_idx] = s1_idx; - } - var char_s1 = '', - char_s2 = ''; - for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) { v1[0] = s2_idx; - char_s2 = s2[s2_idx - 1]; - - for (s1_idx = 0; s1_idx < s1_len; s1_idx++) { - char_s1 = s1[s1_idx]; cost = (char_s1 == char_s2) ? 0 : 1; - var m_min = v0[s1_idx + 1] + 1; - var b = v1[s1_idx] + 1; - var c = v0[s1_idx] + cost; - if (b < m_min) { m_min = b; - } - if (c < m_min) { - m_min = c; - } v1[s1_idx + 1] = m_min; - } - var v_tmp = v0; - v0 = v1; - v1 = v_tmp; } - return v0[s1_len]; -} - String.prototype.format = function() { // format takes either multiple indexed arguments, or a single object, whose keys/values will be used var targetStr = this; var replacements = [].splice.call(arguments, 0);