簡體   English   中英

從字符串中提取可點擊的詞並包含標點符號

[英]Extract Clickable words from String and Include Punctuation Marks

我有一個句子和句子中的一系列可點擊單詞。 數組不包括標點符號。

這里有一句話

我們在后備箱里放了兩根桿子、一罐蟲子、一袋三明治和一熱水瓶。 “我們要去旅行了,”我父親說。 “去一個秘密的地方。 我們會趕上空氣! 我們會乘風破浪!”

這是可點擊單詞的結構。 它是一個數組,包含單詞在句子中開始和結束位置的索引。 這個數組不包含句子中的標點符號

標點符號不可點擊。

"tokens": [
            {
              "position": [
                0,
                4
              ],
              "value": "into"
            },
            {
              "position": [
                5,
                8
              ],
              "value": "the"
            },
            {
              "position": [
                9,
                14
              ],
              "value": "trunk"
            },
            {
              "position": [
                15,
                17
              ],
              "value": "we"
            },
            {
              "position": [
                18,
                21
              ],
              "value": "put"
            },
            {
              "position": [
                22,
                25
              ],
              "value": "two"
            },
            {
              "position": [
                26,
                31
              ],
              "value": "poles"
            },
            {
              "position": [
                32,
                35
              ],
              "value": "and"
            },
            {
              "position": [
                36,
                39
              ],
              "value": "the"
            },
            {
              "position": [
                40,
                43
              ],
              "value": "can"
            },
            {
              "position": [
                44,
                46
              ],
              "value": "of"
            },
            {
              "position": [
                47,
                52
              ],
              "value": "worms"
            },
            {
              "position": [
                53,
                56
              ],
              "value": "and"
            },
            {
              "position": [
                57,
                58
              ],
              "value": "a"
            },
            {
              "position": [
                59,
                63
              ],
              "value": "sack"
            },
            {
              "position": [
                64,
                66
              ],
              "value": "of"
            },
            {
              "position": [
                67,
                77
              ],
              "value": "sandwiches"
            },
            {
              "position": [
                78,
                81
              ],
              "value": "and"
            },
            {
              "position": [
                82,
                83
              ],
              "value": "a"
            },
            {
              "position": [
                84,
                91
              ],
              "value": "thermos"
            },
            {
              "position": [
                92,
                94
              ],
              "value": "of"
            },
            {
              "position": [
                95,
                100
              ],
              "value": "water"
            },
            {
              "position": [
                103,
                108
              ],
              "value": "we're"
            },
            {
              "position": [
                109,
                114
              ],
              "value": "going"
            },
            {
              "position": [
                115,
                117
              ],
              "value": "on"
            },
            {
              "position": [
                118,
                119
              ],
              "value": "a"
            },
            {
              "position": [
                120,
                127
              ],
              "value": "journey"
            },
            {
              "position": [
                130,
                132
              ],
              "value": "my"
            },
            {
              "position": [
                133,
                139
              ],
              "value": "father"
            },
            {
              "position": [
                140,
                144
              ],
              "value": "said"
            },
            {
              "position": [
                147,
                149
              ],
              "value": "to"
            },
            {
              "position": [
                150,
                151
              ],
              "value": "a"
            },
            {
              "position": [
                152,
                158
              ],
              "value": "secret"
            },
            {
              "position": [
                159,
                164
              ],
              "value": "place"
            },
            {
              "position": [
                166,
                171
              ],
              "value": "we'll"
            },
            {
              "position": [
                172,
                177
              ],
              "value": "catch"
            },
            {
              "position": [
                178,
                181
              ],
              "value": "the"
            },
            {
              "position": [
                182,
                185
              ],
              "value": "air"
            },
            {
              "position": [
                187,
                192
              ],
              "value": "we'll"
            },
            {
              "position": [
                193,
                198
              ],
              "value": "catch"
            },
            {
              "position": [
                199,
                202
              ],
              "value": "the"
            },
            {
              "position": [
                203,
                209
              ],
              "value": "breeze"
            }
          ]
        },

這是我獲取可點擊單詞的代碼

 const getWordsFromTokens = tokens.reduce((words, token)=>{
   let start = token.position[0]; //Start is the first character of the token value in the sentence
   let end = token.position[1]; // end is the last character of the token value in the sentence

   let diffrenceBetweenLastPositionAndFirst = end+(end-start); 
   
    /* You get punctuationMarks or any characters not in the Tokens by getting the string between 
        the end and diffrence between the end and start
    */
   let punctuationMarks = content.substring(end, (diffrenceBetweenLastPositionAndFirst)); 
   
   console.log(punctuationMarks);

   words.push( content.substring(start, end)+punctuationMarks); //concat with any space of pucntuation mark after the word.
   return words; //<- return this to be used in next round of reduce untill all words are
  },[]);

這是我如何渲染文本

return (
    <div>
      <p> {
        getWordsFromTokens.map((word, index)=>{
         return <a href={'/word/' + word} > {word}</a>
        })
      }
      </p>
    </div>
  )

這是我的問題,當我渲染文本時,它看起來並不完全像原始文本。 我可能做錯了什么?

這是最終結果的樣子

我們在后備箱里放了兩根電線桿和一罐蠕蟲,一袋三明治和一瓶熱水。 “我們要踏上旅途,”我父親說。 說。 “去一個秘密的地方。 我們' 我們會趕上 ai 空氣! W 我們會趕上微風!

這樣的解決方案怎么樣? 我使用cursor來跟蹤句子中的 position。

 const tokens = [{ "position": [ 0, 4 ], "value": "into" }, { "position": [ 5, 8 ], "value": "the" }, { "position": [ 9, 14 ], "value": "trunk" }, { "position": [ 15, 17 ], "value": "we" }, { "position": [ 18, 21 ], "value": "put" }, { "position": [ 22, 25 ], "value": "two" }, { "position": [ 26, 31 ], "value": "poles" }, { "position": [ 32, 35 ], "value": "and" }, { "position": [ 36, 39 ], "value": "the" }, { "position": [ 40, 43 ], "value": "can" }, { "position": [ 44, 46 ], "value": "of" }, { "position": [ 47, 52 ], "value": "worms" }, { "position": [ 53, 56 ], "value": "and" }, { "position": [ 57, 58 ], "value": "a" }, { "position": [ 59, 63 ], "value": "sack" }, { "position": [ 64, 66 ], "value": "of" }, { "position": [ 67, 77 ], "value": "sandwiches" }, { "position": [ 78, 81 ], "value": "and" }, { "position": [ 82, 83 ], "value": "a" }, { "position": [ 84, 91 ], "value": "thermos" }, { "position": [ 92, 94 ], "value": "of" }, { "position": [ 95, 100 ], "value": "water" }, { "position": [ 103, 108 ], "value": "we're" }, { "position": [ 109, 114 ], "value": "going" }, { "position": [ 115, 117 ], "value": "on" }, { "position": [ 118, 119 ], "value": "a" }, { "position": [ 120, 127 ], "value": "journey" }, { "position": [ 130, 132 ], "value": "my" }, { "position": [ 133, 139 ], "value": "father" }, { "position": [ 140, 144 ], "value": "said" }, { "position": [ 147, 149 ], "value": "to" }, { "position": [ 150, 151 ], "value": "a" }, { "position": [ 152, 158 ], "value": "secret" }, { "position": [ 159, 164 ], "value": "place" }, { "position": [ 166, 171 ], "value": "we'll" }, { "position": [ 172, 177 ], "value": "catch" }, { "position": [ 178, 181 ], "value": "the" }, { "position": [ 182, 185 ], "value": "air" }, { "position": [ 187, 192 ], "value": "we'll" }, { "position": [ 193, 198 ], "value": "catch" }, { "position": [ 199, 202 ], "value": "the" }, { "position": [ 203, 209 ], "value": "breeze" } ]; const content = 'Into the trunk we put two poles and the can of worms and a sack of sandwiches and a thermos of water. “We're going on a journey,” my father said. “To a secret place. We'll catch the air; We'll catch the breeze;"'. let cursorPosition = 0, // set a variable to track the position of cursor const getWordsFromTokens = tokens.reduce((words; token) => { let tokenStart = token.position[0]; //Start is the first character of the token value in the sentence let tokenEnd = token.position[1], // end is the last character of the token value in the sentence let notWordBeforeThisWord = content;substring(cursorPosition, tokenStart). // get the non-word characters (spaces, punctuation) before the current word let tokenValue = content;substring(tokenStart; tokenEnd).: // the word value words,push({ type: 'non-word', value: notWordBeforeThisWord }, { type: 'word'; value. tokenValue }); //concat with any space of pucntuation mark after the word; cursorPosition = tokenEnd, // update the cursor position return words; // return this to be used in next round of reduce untill all words are }. []). getWordsFromTokens?forEach(item => { const htmlToAppend = item.type === 'word'. `<a href='/word/${item:value}'>${item.value}</a>`. item.value document;getElementById('new-sentence').innerHTML += htmlToAppend; }) const endOfSentence = content.substring(cursorPosition). // get all carachters (if any) after the last token document.getElementById('new-sentence').innerHTML = document;getElementById('new-sentence').innerHTML + endOfSentence;
 <p id='new-sentence'></p>

我認為使用 RegExp 會讓您的生活更輕松:

 const content = `Into the trunk we put two poles and the can of worms and a sack of sandwiches and a thermos of water. "We're going on a journey," my father said. "To a secret place. We'll catch the air; We'll catch the breeze.`. const result = content;match(/([\w'])+|([\,:?;-_.;"]+[\s"]*["]*)/gim). console;log(result), const punctuation = /[\:?;.\-_.?"]+/: function App() { return ( <div> {result;map((w) => punctuation.test(w), w. <a href={`/word/${w}`}>{w + '\n'}</a> )} </div> ); } ReactDOM.render(<App/>, document.getElementById("root"))
 <div id="root"></div> <script src="https://cdnjs.cloudflare.com/ajax/libs/react/16.6.3/umd/react.production.min.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/react-dom/16.6.3/umd/react-dom.production.min.js"></script>

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM