Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
286 views
in Technique[技术] by (71.8m points)

jquery - Sort an array by the "Levenshtein Distance" with best performance in Javascript

So I have a random javascript array of names...

[@larry,@nicholas,@notch] etc.

They all start with the @ symbol. I'd like to sort them by the Levenshtein Distance so that the the ones at the top of the list are closest to the search term. At the moment, I have some javascript that uses jQuery's .grep() on it using javascript .match() method around the entered search term on key press:

(code edited since first publish)

limitArr = $.grep(imTheCallback, function(n){
    return n.match(searchy.toLowerCase())
});
modArr = limitArr.sort(levenshtein(searchy.toLowerCase(), 50))
if (modArr[0].substr(0, 1) == '@') {
    if (atRes.childred('div').length < 6) {
        modArr.forEach(function(i){
            atRes.append('<div class="oneResult">' + i + '</div>');
        });
    }
} else if (modArr[0].substr(0, 1) == '#') {
    if (tagRes.children('div').length < 6) {
        modArr.forEach(function(i){
            tagRes.append('<div class="oneResult">' + i + '</div>');
        });
    }
}

$('.oneResult:first-child').addClass('active');

$('.oneResult').click(function(){
    window.location.href = 'http://hashtag.ly/' + $(this).html();
});

It also has some if statements detecting if the array contains hashtags (#) or mentions (@). Ignore that. The imTheCallback is the array of names, either hashtags or mentions, then modArr is the array sorted. Then the .atResults and .tagResults elements are the elements that it appends each time in the array to, this forms a list of names based on the entered search terms.

I also have the Levenshtein Distance algorithm:

var levenshtein = function(min, split) {
    // Levenshtein Algorithm Revisited - WebReflection
    try {
        split = !("0")[0]
    } catch(i) {
        split = true
    };

    return function(a, b) {
        if (a == b)
            return 0;
        if (!a.length || !b.length)
            return b.length || a.length;
        if (split) {
            a = a.split("");
            b = b.split("")
        };
        var len1 = a.length + 1,
            len2 = b.length + 1,
            I = 0,
            i = 0,
            d = [[0]],
            c, j, J;
        while (++i < len2)
            d[0][i] = i;
        i = 0;
        while (++i < len1) {
            J = j = 0;
            c = a[I];
            d[i] = [i];
            while(++j < len2) {
                d[i][j] = min(d[I][j] + 1, d[i][J] + 1, d[I][J] + (c != b[J]));
                ++J;
            };
            ++I;
        };
        return d[len1 - 1][len2 - 1];
    }
}(Math.min, false);

How can I work with algorithm (or a similar one) into my current code to sort it without bad performance?

UPDATE:

So I'm now using James Westgate's Lev Dist function. Works WAYYYY fast. So performance is solved, the issue now is using it with source...

modArr = limitArr.sort(function(a, b){
    levDist(a, searchy)
    levDist(b, searchy)
});

My problem now is general understanding on using the .sort() method. Help is appreciated, thanks.

Thanks!

Question&Answers:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

I wrote an inline spell checker a few years ago and implemented a Levenshtein algorithm - since it was inline and for IE8 I did quite a lot of performance optimisation.

var levDist = function(s, t) {
    var d = []; //2d matrix

    // Step 1
    var n = s.length;
    var m = t.length;

    if (n == 0) return m;
    if (m == 0) return n;

    //Create an array of arrays in javascript (a descending loop is quicker)
    for (var i = n; i >= 0; i--) d[i] = [];

    // Step 2
    for (var i = n; i >= 0; i--) d[i][0] = i;
    for (var j = m; j >= 0; j--) d[0][j] = j;

    // Step 3
    for (var i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);

        // Step 4
        for (var j = 1; j <= m; j++) {

            //Check the jagged ld total so far
            if (i == j && d[i][j] > 4) return n;

            var t_j = t.charAt(j - 1);
            var cost = (s_i == t_j) ? 0 : 1; // Step 5

            //Calculate the minimum
            var mi = d[i - 1][j] + 1;
            var b = d[i][j - 1] + 1;
            var c = d[i - 1][j - 1] + cost;

            if (b < mi) mi = b;
            if (c < mi) mi = c;

            d[i][j] = mi; // Step 6

            //Damerau transposition
            if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
                d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
            }
        }
    }

    // Step 7
    return d[n][m];
}

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...