简体   繁体   中英

Extract matched text and its neighboring context n words upstream n words downstream (nth occurrence from left and nth occurrence from right)

I need text that fits nicely in an autocomplete dropdown. This required more strategy than it would initially appear. Using hidden text or fadeout option didn't work well because the matched text string was often outside of the visible frame of its context. The ellipsis approach currently is a bit heavier weight than I need. "Show more" works well, but not in the context of the dropdown box; I don't want the user to worry about anything except selection of the option. I just want a line or two of context centred around the matched token. There are some related answers, but nothing that quite addresses the need for both n from the right and n from the left.

var haystack = "r4 r3 r2 r1 needle r1 r2 r3";
var needle = "needle";
var delim = " ";
var numWords = 5;
var trimmedText = trimToNeighboringText(haystack, needle, numWords, delim);

console.log("With " + numWords + " neighboring words: \"" + trimmedText + "\"");

Regex can simplify this a lot!
Here is my solution:

var haystack = "L5 L4 L3 L2 L1 needle R1 R2 R3 R4 R5 R6",
    needle = "needle",
    numWords = 3;

var result = haystack.match("(?:\\s?(?:[\\w]+)\\s?){"+numWords+"}"+needle+"(?:\\s?(?:[\\w]+)\\s?){"+numWords+"}");

console.log("With " + numWords + " neighboring words: \"" + result[0] + "\"");

With 3 neighboring words: " L3 L2 L1 needle R1 R2 R3 "

var haystack = "L5 L4 L3 L2 L1 needle R1 R2 R3 R4 R5 R6";
var needle = "needle";
var delim = " ";
var numWords = 3;
var trimmedText = trimToNeighboringText(haystack, needle, numWords, delim);

console.log("With " + numWords + " neighboring words: \"" + trimmedText + "\"");

function trimToNeighboringText(haystack, needle, numWords, delim) {

    // number of delimiter occurrences to look for,
    // this assumes the same on both sides
    var numDelims = numWords + 1;

    // this splits on the text that is matched
    var tokens = haystack.split(needle);

    if (tokens.length > 1) {

        var leftEllipsis = "";
        var rightEllipsis = "";

        // Get the index of the start character within the left neighbor,
        // working backwards
        var startIndex = nthOccurrenceBackwards(tokens[0], delim, numDelims);
        //console.log(startIndex + ": " + tokens[0].substr(startIndex));

        // if text is truncated at left
        if (startIndex > 0) {
            leftEllipsis = "... ";
        }
        // if text is not truncated at left
        else startIndex = 0;

        // Get the index of the end character within the right neighbor
        // working forwards (note that start is local to right neighbor)
        var endIndex = nthOccurrenceForwards(tokens[1], delim, numDelims);

        // if text is truncated at right
        if (endIndex > 0) {
            rightEllipsis = " ...";
        }
        // if text is not truncated at right
        else {
            endIndex = tokens[1].length;
        }

        // Concatenate the left fragment, the needle, and the right fragment
        return (leftEllipsis + tokens[0].substr(startIndex) + needle
                             + tokens[1].substr(0, endIndex) + rightEllipsis);

    } else {
        console.warn("Match not found");
        return haystack;
    }
}

function nthOccurrenceForwards(str, pat, n) {
    if (str.length == 0) return 0;
    //console.log("\""+str+"\"");
    var i = -1;
    while (n-- && i++ < str.length) {
        i = str.indexOf(pat, i);
        if (i==-1) break;
        //console.log("n and i "+n + "," + i)
    }
    return i;
}

function nthOccurrenceBackwards(str, pat, n) {
    if (str.length == 0) return 0;
    var j = str.length;
    while (n-- && j-- > 1) {
        j = str.lastIndexOf(pat, j);
    }
    return j;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM