简体   繁体   中英

How to read text between xml tags in a string using NodeJS

Input String:

Can you book a flight from <SrcAirport> Detroit </SrcAirport> to <DestAirport> Chicago </DestAirport>?

Expected Output:

{
    "utterance": "Can you book a flight from Detroit to Chicago?",
    "entities":[
        {
            "name": "SrcAirport",
            "value": "Detroit"
            "startPosition": "27"
            "endPosition": "33"
        },
        {
            "name": "DestAirport",
            "value": "Chicago",
            "startPosition": "38"
            "endPosition": "44"
        }
    ]
}

More Information:

startPosition: This is the index of the string where the word begins in the generated utterance string.

endPosition: This is the index of the string where the word ends in the generated utterance string.

Here's my attempt at solving this. I appreciate any comments to improve it.

 var inputText = "Can you book a flight from <SrcAirport> Detroit </SrcAirport> to <DestAirport> Chicago </DestAirport>?"; var entities = extractEntities(inputText); console.log(entities); //--------------------------FUNCTIONS------------------------------------- function extractEntities(input) { var tagPairs = getTagPairs(input); tagPairs.forEach(element => { input = processTagPair(element, input); }); var rv = { utterance: input, entities: tagPairs }; return rv; } function getTagPairs(str) { var reTagCatcher = /(<.[^(><.)]+>)/g; var output = str.match(reTagCatcher); var tagPairs = []; if (output && output.length > 0) { while (output.length > 0) { if (output.length >= 2) { var startTag = output[0]; var endTag = output[1]; if (areCorrectTags(startTag, endTag)) { tagPairs.push({ startTag: startTag, endTag: endTag, entityName: startTag.substring(1, startTag.length - 1) }); } } output.splice(0, 1); // Remove top one element } } return tagPairs; } function processTagPair(element, str) { leftIndexOfStartTag = str.indexOf(element.startTag); leftIndexOfEndTag = str.indexOf(element.endTag, leftIndexOfStartTag + element.startTag.length - 1); valueBeginIndex = leftIndexOfStartTag + element.startTag.length; value = str.substring(valueBeginIndex, leftIndexOfEndTag).trim(); //console.log("|" + value + "|"); element.value = value; str = replaceText(str, leftIndexOfStartTag, leftIndexOfEndTag + element.endTag.length - 1, value); element.startPosition = leftIndexOfStartTag; element.endPosition = element.startPosition + value.length - 1; return str; } // Check whether two tag pairs are correct function areCorrectTags(stag, etag) { sTagName = stag.substring(1, stag.length - 1); eTagName = etag.substring(2, etag.length - 1); return sTagName === eTagName && isValidEntity(sTagName); } // To Be Implemented function isValidEntity(entityName) { return true; //Write the logic to check whether entityName is a valid entity enabled for the Chatbot - If its not, return false } function replaceText(sourceString, startIndex, endIndex, replacementValue) { var left = sourceString.substring(0, startIndex); var right = sourceString.substring(endIndex + 1, sourceString.length); return left + replacementValue + right; }

Here's an API that can do this. https://rapidapi.com/akashpavate58/api/nlp-helper

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM