简体   繁体   中英

Is there a better way to merge row data based on criteria (Google Apps Script)?

Alright...Let me start by saying I am self-taught with Google Apps Script...enough said, right!? The script below is functioning but I want to optimize it or come up with another way to achieve the same result. The script takes 18000 rows and 86 columns of data and combines them into a single rows based on an id list. The id list is about 13000 rows long. The short version is this...it filters the data by the id and then checks each column for the last row with submitted data and returns that cell. For example:

//sample data
[[311112, 1, 2, 4, 5,"","","","","","", 2, 3],
[323223,"","","","","", 2, 4, 4,"","","",""],
[321321, 1, 2, 4, 5,"","","","","","", 2, 3],
[311112, 4, 1, 6, 7,"", 3,"", 3,"","", 5, 3],
[321233,"","","","","","", 4, 3, 1, 5,"",""],
[321321,"","","","","","","","", 1 ,4,"",""],
[323223,"","","","","", 2, 3,"","","","",""],
[323153,"", 2, 3, 6,"","","","","","","",""],
[321321,"","","","","", 2, 3,"","","","",""],
[321321,"", 5, 3,"", 1,"","","","","","",""]]

//filtered Data by id 321321
[[321321, 1, 2, 4, 5,"","","","","","", 2, 3],
[321321,"","","","","","","","", 1, 4,"",""],
[321321,"","","","","", 2, 3,"","","","",""],
[321321,"", 5, 3,"", 1,"","","","","","",""]]

// returned row is getting the last nonempty value for each column from the filtered data.

[[321321, 1, 5, 3, 5, 1, 2, 3,"", 1, 4, 2, 3]]

It takes about 16-18 minutes for the script to complete. Is there a better way to accomplish this or any optimizations suggestions?

function combineR(startRow, startRange) {
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheets = ss.getSheets();
  var testSheet = ss.getSheetByName('Raw Scores');
  var cSheet = ss.getSheetByName('Combined Scores');
  var gradingResults = testSheet.getRange(1, 1, testSheet.getLastRow(), testSheet.getLastColumn()).getValues();

  if (startRow > cSheet.getLastRow()) {
    return;
  }

  if (startRow + startRange > cSheet.getLastRow()) {
    startRange = cSheet.getLastRow() - startRow;
  }

  var sID = cSheet.getRange(startRow, 2, startRange).getValues();
  var maxScores = [];
  for (var x = 0; x < sID.length; x++) {
    var filtered = gradingResults.filter(function (dataRow) {
      return dataRow[0] === sID[x][0];
    });

    if (isFinite(filtered)) {
      maxScores.push(['', '', '', '', '', '', '', '', '', '',
        '', '', '', '', '', '', '', '', '', '',
        '', '', '', '', '', '', '', '', '', '',
        '', '', '', '', '', '', '', '', '', '',
        '', '', '', '', '']);
      continue;
    } else {
      maxScores.push(['', getMaxLetter(filtered, 3), lastGraded(filtered, 4), lastGraded(filtered, 5), lastGraded(filtered, 6), lastGraded(filtered, 7), lastGraded(filtered, 8), lastGraded(filtered, 9), lastGraded(filtered, 10), lastGraded(filtered, 11),
        lastGraded(filtered, 12), lastGraded(filtered, 13), lastGraded(filtered, 14), lastGraded(filtered, 15), lastGraded(filtered, 16), lastGraded(filtered, 17), lastGraded(filtered, 18), lastGraded(filtered, 19), lastGraded(filtered, 20), lastGraded(filtered, 21),
        lastGraded(filtered, 22), lastGraded(filtered, 23), lastGraded(filtered, 24), lastGraded(filtered, 25), lastGraded(filtered, 26), lastGraded(filtered, 27), lastGraded(filtered, 28), lastGraded(filtered, 29), lastGraded(filtered, 30), lastGraded(filtered, 31),
        lastGraded(filtered, 32), lastGraded(filtered, 33), lastGraded(filtered, 34), lastGraded(filtered, 35), lastGraded(filtered, 36), lastGraded(filtered, 37), lastGraded(filtered, 38), lastGraded(filtered, 39), lastGraded(filtered, 40), lastGraded(filtered, 41),
        lastGraded(filtered, 42), lastGraded(filtered, 43), lastGraded(filtered, 44), lastGraded(filtered, 45), lastGraded(filtered, 46)]);
    }
  }
  cSheet.getRange(startRow, 11, maxScores.length, maxScores[0].length).setValues(maxScores)
}

function getMaxLetter(arr, idx) {
  var letter = arr.map(function (e) { return e[idx] }).sort().pop();
  return letter;
}

function lastGraded(arr, idx) {
  var newArray = arr.map(function (e) { return e[idx] });
  newArray.reverse();
  for (var x = 0; x < newArray.length; x++) {
    if (typeof newArray[x] == 'number') {
      return newArray[x];
    }
  }
  return '';
}

Column A has duplicate Ids that need to be merged 原始数据

Column B has the unique values that are the final merged product 组合数据

Issues:

The script seems to have various issues, but the main issue seems to be calling lastGraded function many times with various indexes. This does map , reverse and everything else for each index and costs time.

Solution:

Given your sample data, I propose the following approach:

  • Get all the input data in 1 2D array

  • Reduce the input data to a Map . The map will have each id as key and all the rows that match that key as 2D array for each key . This will greatly increase performance/speed at the cost of memory. This is better than filtering the array by each id, because,

    • You loop the input array only once
    • whereas arr.filter will have to loop the array for each id
  • Once reduced to a map , Loop through each array in the map in the reverse for each element in the last row to find the non empty element.

Sample snippet:

 const arrMain = //sample data [ [311112, 1, 2, 4, 5, '', '', '', '', '', '', 2, 3], [323223, '', '', '', '', '', 2, 4, 4, '', '', '', ''], [321321, 1, 2, 4, 5, '', '', '', '', '', '', 2, 3], [311112, 4, 1, 6, 7, '', 3, '', 3, '', '', 5, 3], [321233, '', '', '', '', '', '', 4, 3, 1, 5, '', ''], [321321, '', '', '', '', '', '', '', '', 1, 4, '', ''], [323223, '', '', '', '', '', 2, 3, '', '', '', '', ''], [323153, '', 2, 3, 6, '', '', '', '', '', '', '', ''], [321321, '', '', '', '', '', 2, 3, '', '', '', '', ''], [321321, '', 5, 3, '', 1, '', '', '', '', '', '', ''], ]; //reduce input array to a map of id=>rows const map = arrMain.reduce((map, row) => { if (!map.has(row[0])) map.set(row[0], [row]); else map.get(row[0]).push(row); return map; }, new Map()); const out = []; map.forEach(arr2d => { const l = arr2d.length - 1, lastRow = arr2d[l].slice(0); //iterate lastrow of this id's column elements for (let j = 0; j < lastRow.length; ++j) { if (lastRow[j] === '') { //iterate each row of this id for (let i = l; i >= 0; --i) { if (arr2d[i][j] !== '') { lastRow[j] = arr2d[i][j]; break; } } } } out.push(lastRow); }); console.log(out);

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM