简体   繁体   中英

How to programmatically format text between start and end tags and then remove the tags

Edit: the below endeavor is related to Google Apps Script to format text in a Google Doc.

I'm not familiar with JavaScript and really have only done some small bit of R coding and so this effort is a bit of parsing what I can google as well as some trial and error. I've had some promising success, but I'm also having some trouble finding a complete answer to the following scenario.

Context

I have a google doc template that has embedded merge codes. A separate application is pulling data fields from objects and related objects and replacing those merge codes with unformatted text. A side effect of this application is that I cannot format the merge codes ahead of time so that when replaced, the text is formatted appropriately. Therefore I'm trying to create a script to run following text merge to programmatically apply some formatting.

What I Need

I have three different styles I need to apply. I am creating code to search for start tags and end tags and then format the text between. In a perfect world the tags would also then be removed from the document leaving only the formatted text behind.

Styled Item Tags Formatting to be applied
Requests <req> </req> Roboto 10, Bold, #4a5356
Citations <cit> </cit> Lato 8, Bold, #4A5356
Conclusions <con> </con> Lato 8, Bold, #B38F00

Code so Far

function onOpen() {
  DocumentApp.getUi().createMenu('Butler')
      .addItem('Format Headings', 'FormatRequests')

      .addToUi();
}

function FormatRequests() {
  var startTag = '<req>';
  var endTag = '</req>'
  var body = DocumentApp.getActiveDocument().getBody();
  var para = body.getParagraphs();
  for(var i in para){  
    var from = para[i].findText(startTag);
    var to =  para[i].findText(endTag,from);
    if((to != null && from != null) && ((to.getStartOffset()-1) - (from.getStartOffset()+startTag.length) > 0) ){
      para[i].editAsText().setBold(from.getStartOffset()+startTag.length, to.getStartOffset()-1, true);
    }
  }
}

So far I've succeeded in finding the tags and setting text between the tags to bold. But I don't know how to continue to apply the remainder of the formatting or how to remove the tags once the formatting has been applied.

Any ideas?

So, what I've done is converted the data into an array and using that array I've made the desired formatting.

So, if this the data:

Hey! <req>My name is John</req> and I am a <cit>web developer</cit> from Canada. I love coding and solving problems. <con>Ping me if you want to talk code</con>.

I will convert this into the following nested array, where the first item at every index specifies the start index.

[
  [ 0, 'Hey! ' ],
  [ 5, 'My name is John', '<req>' ],
  [ 20, ' and I am a ' ],
  [ 32, 'web developer', '<cit>' ],
  [ 45, ' from Canada. I love coding and solving problems. ' ],
  [ 95, 'Ping me if you want to talk code', '<con>' ],
  [ 127, '.' ]
]

NOTE: I've used ~ and ^ characters to split the data, hoping that these two characters don't appear in your data.

Working Code Below:

function myFunction() {
  const body = DocumentApp.getActiveDocument().getBody();

  const allText = body.editAsText();
  const arr = splitText(allText.getText())
  
  allText.deleteText(0, allText.getText().length - 1)

  for (let el of arr) {
    let someText = allText.appendText(el[1])
    let start = el[0];
    let end = el[0] + el[1].length - 1;

    if (el.length > 2) {
      if (el[2] === "<req>") {
        someText.setFontFamily(start, end, DocumentApp.FontFamily.ROBOTO);
        someText.setFontSize(start, end, 10);
        someText.setBold(start, end, true);
        someText.setForegroundColor(start, end, "#4a5356")
      }else if(el[2] === "<cit>") {
        someText.setFontFamily(start, end, DocumentApp.FontFamily.TIMES_NEW_ROMAN);
        someText.setFontSize(start, end, 8);
        someText.setBold(start, end, true);
        someText.setForegroundColor(start, end, "#4a5356")
      }else if (el[2] === "<con>") {
        someText.setFontFamily(start, end, DocumentApp.FontFamily.TIMES_NEW_ROMAN);
        someText.setFontSize(start, end, 8);
        someText.setBold(start, end, true);
        someText.setForegroundColor(start, end, "#b38f00")
      }
    } else {
      someText.setFontFamily(start, end, null);
        someText.setFontSize(start, end, null);
        someText.setBold(start, end, null);
        someText.setForegroundColor(start, end, null)
    }
  }
}

function splitText(data) {
  return data
  .replace(/<([a-z]{3})>/g, "~")
  .replace(/<\/([a-z]{3})>/g, "^<$1>~")
  .split("~")
  .map((s) => s.split("^"))
  .reduce((r, s, i, a) => (i === 0? r.push([0, ...s]): r.push([r[i - 1][0] + a[i - 1][0].length, ...s]), r), [])
}

Try this:

function main() {

  handle_tags(['<req>', '</req>'], "Roboto", 10, "Bold", "#4a5356");
  handle_tags(['<cit>', '</cit>'], "Lato", 8, "Bold", "#4a5356");
  handle_tags(['<con>', '</con>'], "Lato", 8, "Bold", "#B38F00");
  
}

function handle_tags(tags, family, size, style, color) {

  var body      = DocumentApp.getActiveDocument().getBody();
  var start_tag = tags[0];
  var end_tag   = tags[1];
  
  var found     = body.findText(start_tag);

  while (found) {
    var elem    = found.getElement();
    var start   = found.getEndOffsetInclusive();
    var end     = body.findText(end_tag, found).getStartOffset()-1;

    elem.setFontFamily(start, end, family);
    elem.setFontSize(start, end, size);
    elem.setForegroundColor(start, end, color);

    switch (style.toLowerCase()) {
      case 'bold': elem.setBold(start, end, true); break;
      case 'italic': elem.setItalic(start, end, true); break;
      case 'underline': elem.setUnderline(start, end, true); break;
    }

    found = body.findText(start_tag, found);
  }

  body.replaceText(start_tag, '');
  body.replaceText(end_tag, '');
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM