![](/img/trans.png)
[英]How do I check if pairs of ints exist in array X, then remove the specific values that form the pairs from array Y?
[英]How do I check for word pairs in a string?
我有下面的代碼,它可以完美地創建一個字符串(大量 .txt 文件)中出現的單詞的 CSV 列表,如下所示:
Name;Total
THE;23562
OF;15954
AND;15318
IN;12159
TO;11879
A;11145
I;6135
WAS;6045
etc...
我現在想要的是兩個單詞對,如果證明足夠簡單,甚至可能是三個單詞對。 所以像
Name;Total
OF THE;25
FROM THE;20
BY WHICH;13
OF WHICH;5
etc...
如何修改現有代碼以檢查成對而不是單個單詞?
//chrisjopa.com/2016/04/21/counting-word-frequencies-with-javascript/
var fs = require('fs');
var file = 'INPUT.txt';
//Create Headers for the CSV File
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
const csvWriter = createCsvWriter({
//Define Pathname to your choice
path: 'Data1.csv',
header: [
{id: 'name', title: 'Name'},
{id: 'total', title: 'Total'},
]
});
// read file from current directory
fs.readFile(file, 'utf8', function (err, data) {
if (err) throw err;
var wordsArray = splitByWords(data);
var wordsMap = createWordMap(wordsArray);
var finalWordsArray = sortByCount(wordsMap);
//Write CSV Output File
csvWriter
.writeRecords(finalWordsArray)
.then(()=> console.log('DONE'));
});
function splitByWords (text) {
// Removes all special characters, then white spaces,
//then converts to all capital letters, then splits the words
var noPunctuation = text.replace(/[\.,-\/#!$%\^&\*;:{}�=\-_'`’~"()@\+\?><\[\]\+]/g, '');
var noExtraSpaces = noPunctuation.replace(/\s{2,}/g," ");
var allUpperCase = noExtraSpaces.toUpperCase();
var wordsArray = allUpperCase.split(/\s+/);
return wordsArray;
}
//This is the part in the code that I feel is the place to check for word
//pairs, but I'm not sure how I'm supposed to write it.
function createWordMap (wordsArray, ) {
// create map for word counts
var wordsMap = {};
wordsArray.forEach(function (key) {
if (wordsMap.hasOwnProperty(key)) {
wordsMap[key]++;
} else {
wordsMap[key] = 1;
}
});
return wordsMap;
}
function sortByCount (wordsMap) {
// sort by count in descending order
var finalWordsArray = [];
finalWordsArray = Object.keys(wordsMap).map(function(key) {
return {
name: key,
total: wordsMap[key]
};
});
finalWordsArray.sort(function(a, b) {
return b.total - a.total;
});
return finalWordsArray;
}
從wordsArray
,創建另一個數組,將每對單詞放在一起。 例如,從wordsArray
中
['Foo', 'Bar', 'Baz', 'Buzz']
創建:
['Foo Bar', 'Bar Baz', 'Baz Buzz']
然后,您可以使用完全相同的函數來計算每對出現的次數 - 只需使用它調用createWordMap
(然后使用sortByCount
)。 例如:
const wordsArray = ['Foo', 'Bar', 'Baz', 'Buzz', 'Foo', 'Bar']; const wordPairsArray = []; for (let i = 1; i < wordsArray.length; i++) { wordPairsArray.push(wordsArray[i - 1] + ' ' + wordsArray[i]); } const wordPairMap = createWordMap(wordPairsArray); const wordPairCount = sortByCount(wordPairMap); console.log(wordPairCount); // the following is your original code: function createWordMap(wordsArray, ) { // create map for word counts var wordsMap = {}; wordsArray.forEach(function(key) { if (wordsMap.hasOwnProperty(key)) { wordsMap[key]++; } else { wordsMap[key] = 1; } }); return wordsMap; } function sortByCount(wordsMap) { // sort by count in descending order var finalWordsArray = []; finalWordsArray = Object.keys(wordsMap).map(function(key) { return { name: key, total: wordsMap[key] }; }); finalWordsArray.sort(function(a, b) { return b.total - a.total; }); return finalWordsArray; }
要將其擴展到不僅僅是對,只需更改循環以將動態數量的元素連接在一起:
function combineWords(words, wordsInItem) { const items = []; for (let i = wordsInItem - 1; i < words.length; i++) { const start = i - (wordsInItem - 1); const end = i + 1; items.push(words.slice(start, end).join(' ')); } return items; } function getCount(words, wordsInItem) { const combinedWords = combineWords(words, wordsInItem); const map = createWordMap(combinedWords); const count = sortByCount(map); console.log(count); } getCount(['Foo', 'Bar', 'Baz', 'Buzz', 'Foo', 'Bar'], 2); getCount(['Foo', 'Bar', 'Baz', 'Buzz', 'Foo', 'Bar', 'Baz'], 3); // the following is your original code: function createWordMap(wordsArray, ) { // create map for word counts var wordsMap = {}; wordsArray.forEach(function(key) { if (wordsMap.hasOwnProperty(key)) { wordsMap[key]++; } else { wordsMap[key] = 1; } }); return wordsMap; } function sortByCount(wordsMap) { // sort by count in descending order var finalWordsArray = []; finalWordsArray = Object.keys(wordsMap).map(function(key) { return { name: key, total: wordsMap[key] }; }); finalWordsArray.sort(function(a, b) { return b.total - a.total; }); return finalWordsArray; }
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.