簡體   English   中英

如何使用 node.js 和 cloudflare 工作人員在現有 HTML 響應中注入 javascript

[英]How to inject javascript in existing HTML response with node.js and cloudflare workers

我有一個指向 GitBook 的虛 URL。 GitBook 不支持插入任意 javascript 片段。 目前 GitBook 只有 4 個“集成”。

我可以通過我自己的 VM 服務器路由來實現這一點,但我有 CloudFlare,我想嘗試工作人員。 (在 CDN 邊緣運行的 Javascript)。

CloudFlare 工作環境使標頭注入非常容易,但沒有明顯的方法可以做到這一點。

使用 TransformStream 進行處理很重要,以便處理是異步的並且不需要內存緩沖(為了可擴展性和最小化 GC) - 只有 5 毫秒的 CPU 時間預算。

概述

  • 要為自己使用,請更改字符串forHeadStartforHeadEndforBodyEnd
  • 這種 deferredInjection 方法是推薦的方法,可以最大限度地減少工作人員的 CPU 時間。 它更有效,因為它只需要解析 HTML 的最開始。 另一種方法需要解析 headInjection 的整個 head 部分,如果您使用 bodyInjection,它實際上需要解析整個 html 響應。
  • deferredInjection 方法的工作原理是將內容注入到 head 標記的開頭,然后在客戶端運行時將 HTML 內容部署到所需的位置。
  • 如果需要,您可以使用headInjection和/或bodyInjection直接注入。 取消注釋相關代碼,包括injectScripts代碼,並為將被編碼的tagBytes 設置字符串。
  • 此解決方案只會解析 HTML 內容類型
  • 此解決方案直接適用於字節(而非字符串)以提高效率。 搜索結束標記字符串的字節。
  • 您可能會定位更多的結束標簽,但通常您不需要定位超過這兩個
  • 使用流處理數據(整個 HTML 字符串未緩存在內存中)。 這會降低峰值內存使用量並加快到第一個字節的時間。
  • 處理結束標記位於文本讀取邊界上的罕見邊緣情況。 我相信每 ~1000 字節可能會出現一個邊界(每個 TCP 數據包 1000-1500 字節),這可能會因 gzip 壓縮而有所不同。
  • 將注入解析代碼分開,以便代碼簡單地轉發其余部分,以保持清晰。
  • 如果您不需要它,您可以通過將其注釋掉來禁用第二個 body-tag 注入器 - 這將加快處理速度。
  • 我已經為自己測試了這個確切的代碼並且它有效。 可能存在剩余的錯誤(取決於結束標記的位置,以及您的服務器是否使用部分 html 模板(僅正文)進行回復)。 我今天可能修好了一個 2019-06-28

代碼

addEventListener('fetch', event => {
  event.passThroughOnException();
  event.respondWith(handleRequest(event.request))
})

/**
 * Fetch and log a request
 * @param {Request} request
 */
async function handleRequest(request) {
  const response = await fetch(request);

  var ctype = response.headers.get('content-type');
  if (ctype.startsWith('text/html') === false)
    return response; //Only parse html body

  let { readable, writable } = new TransformStream();
  let promise = injectScripts(response.body, writable);
  return new Response(readable, response);
}

let encoder = new TextEncoder('utf-8');

let deferredInjection = function() {
    let forHeadStart = `<script>var test = 1; //Start of head section</script>`;
    let forHeadEnd = `<script>var test = 2; //End of head section</script>`;
    let forBodyEnd = `<script>var test = 3; //End of body section</script><button>click</button>`;

    let helper = `
    ${forHeadStart}
    <script>
        function appendHtmlTo(element, htmlContent) {
            var temp = document.createElement('div');
            temp.innerHTML = htmlContent;
            while (temp.firstChild) {
                element.appendChild(temp.firstChild);
            };
        }

        let forHeadEnd = "${ btoa(forHeadEnd) }";
        let forBodyEnd = "${ btoa(forBodyEnd) }";

        if (forHeadEnd.length > 0) appendHtmlTo(document.head, atob(forHeadEnd)); 
    if (forBodyEnd.length > 0) window.onload = function() {
      appendHtmlTo(document.body, atob(forBodyEnd));
    };

    </script>
    `;
    return {
        forInjection: encoder.encode(helper),
        tagBytes: encoder.encode("<head>"),
        insertAfterTag: true
    };

}();

// let headInjection = {
    // forInjection: encoder.encode("<script>var test = 1;</script>"),
    // tagBytes: encoder.encode("</head>"), //case sensitive
    // insertAfterTag: false
// };
// let bodyInjection = {
    // forInjection: encoder.encode("<script>var test = 1;</script>"),
    // tagBytes: encoder.encode("</body>"), //case sensitive
    // insertAfterTag: false
// }

//console.log(bodyTagBytes);
encoder = null;

async function injectScripts(readable, writable) {
  let processingState = {
    readStream: readable,
    writeStream: writable,
    reader: readable.getReader(),
    writer: writable.getWriter(),
    leftOvers: null, //data left over after a closing tag is found
    inputDone: false,
    result: {charactersFound: 0, foundIndex: -1, afterHeadTag: -1} //Reused object for the duration of the request
  };


  await parseForInjection(processingState, deferredInjection);

  //await parseForInjection(processingState, headInjection);

  //await parseForInjection(processingState, bodyInjection);

  await forwardTheRest(processingState);      
}



///Return object will have foundIndex: -1, if there is no match, and no partial match at the end of the array
///If there is an exact match, return object will have charactersFound:(tagBytes.Length)
///If there is a partial match at the end of the array, return object charactersFound will be < (tagBytes.Length)
///The result object needs to be passed in to reduce Garbage Collection - we can reuse the object
function searchByteArrayChunkForClosingTag(chunk, tagBytes, result)
{   
  //console.log('search');
    let searchStart = 0;
  //console.log(tagBytes.length);
    //console.log(chunk.length);

    for (;;) {
        result.charactersFound = 0;
        result.foundIndex = -1;
        result.afterHeadTag = -1;
    //console.log(result);

        let sweepIndex = chunk.indexOf(tagBytes[0], searchStart);
        if (sweepIndex === -1)
            return; //Definitely not found

        result.foundIndex = sweepIndex;
        sweepIndex++;
        searchStart = sweepIndex; //where we start searching from next
        result.charactersFound++;   
        result.afterHeadTag = sweepIndex;

    //console.log(result);

        for (let i = 1; i < tagBytes.length; i++)
        {
            if (sweepIndex === chunk.length) return; //Partial match
            if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; } //Failed to match (even partially to boundary)
            result.charactersFound++;
            result.afterHeadTag = sweepIndex; //Because we work around the actual found tag in case it's across a boundary
        }   

    if (result.charactersFound === tagBytes.length)
          return; //Found
    }

}

function continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result)
{
  //console.log('continue');
    //Finish the search (no need to check the last buffer at all)
    //console.log('finish the search');
    result.charactersFound = lastSplitResult.charactersFound; //We'll be building on the progress from the lastSplitResult
    result.foundIndex = (-1 * result.charactersFound); //This won't be used, but a negative value is indicative of chunk spanning
    let sweepIndex = 0;
    result.afterHeadTag = 0;
    for (let i = lastSplitResult.charactersFound; i < tagBytes.length; i++) //Zero-based
    {
        if (sweepIndex === chunk.length) return result; //So we support working on a chunk that's smaller than the tagBytes search size
        if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; }
        result.charactersFound++;
        result.afterHeadTag = sweepIndex;
    }
}

function continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
{
  //console.log('continueOrNewSearch');
      if (lastSplitResult == null)
          searchByteArrayChunkForClosingTag(chunk, tagBytes, result);
      else
      {
          continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result);
        if (result.charactersFound === tagBytes.length)
            return result;
        else
            return searchByteArrayChunkForClosingTag(chunk, tagBytes, result); //Keep searching onward
      }
}

async function parseForInjection(processingState, injectionJob)
{
  if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?            
  if (!injectionJob) return;
  if (!injectionJob.tagBytes) return;
  if (!injectionJob.forInjection) return;

  let reader = processingState.reader;
  let writer = processingState.writer;
  let result = processingState.result;
  let tagBytes = injectionJob.tagBytes;
  //(reader, writer, tagBytes, forInjection)

  let lastSplitResult = null;
  let chunk = null;
  processingState.inputDone = false;
  for (;;) {
    if (processingState.leftOvers)
      {
      chunk = processingState.leftOvers;
      processingState.leftOvers = null;
      }
      else
      {
      let readerResult = await reader.read();
      chunk = readerResult.value;
      processingState.inputDone = readerResult.done;
      }

      if (processingState.inputDone) {
        if (lastSplitResult !== null) {
            //Very edge case: Somehow tagBytes is never found?            
            console.log('edge');
                  throw 'tag not found'; //Causing the system to fall back to the direct request
        }
        await writer.close();
        return true;
      }   
      //console.log(value.length);

        continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
      //console.log(result);

      if (result.charactersFound === tagBytes.length) //Complete match
      {
        //Inject
        //console.log('inject');
        if (result.foundIndex > 0)
        {
          let partValue = chunk.slice(0, result.foundIndex);
          //console.log(partValue);
          await writer.write(partValue);
        }
        console.log('injected');
        if (parseForInjection.insertAfterTag)
        {
            await writer.write(injectionJob.forInjection);
            await writer.write(injectionJob.tagBytes);
        }
        else
        {
            await writer.write(injectionJob.tagBytes);
            await writer.write(injectionJob.forInjection);
        }
        let remainder = chunk.slice(result.afterHeadTag, chunk.length - 1);
        processingState.leftOvers = remainder;
        lastSplitResult = null;
        return;
      }

      if (lastSplitResult !== null)
      {
        //console.log('no match over boundary');
        //The remainder wasn't found, so write the partial match from before (maybe `<` or `</`)
        let failedLastBit = injectionJob.tagBytes.slice(0, lastSplitResult.charactersFound);
        await writer.write(failedLastBit);
        lastSplitResult = null;
      }

      if (result.charactersFound === 0)
      {
        //console.log('not found')
        await writer.write(chunk);
        continue;
      }

      if (result.charactersFound < tagBytes.length)
      {
        //console.log('boundary: ' + result.charactersFound);
        lastSplitResult = result;
        let partValue = chunk.slice(0, result.foundIndex);
        //console.log(partValue);
        await writer.write(partValue);
        continue;
      }
  }
}

async function forwardTheRest(processingState)
{
  try
  {
  if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?            

  if (processingState.leftOvers)
  {
    chunk = processingState.leftOvers;
    await processingState.writer.write(chunk);
  }

  processingState.reader.releaseLock();
  processingState.writer.releaseLock();

  await processingState.readStream.pipeTo(processingState.writeStream);

  //Should there be an explicit close method called? I couldn't find one
  }
  catch (e)
  {
    console.log(e);
  }
}

直接使用 (utf-8) 字節的進一步說明:

  • 僅使用字節值。 這至少可以通過搜索字符的第一個獨特的 utf-8 字節(< 128 和 > 192)來實現。 但在這種情況下,我們正在搜索由低於 128 字節組成的</head> ,非常容易使用。
  • 鑒於搜索 utf-8(這是最棘手的)的性質,這應該適用於 ['utf-8', 'utf8', 'iso-8859-1', 'us-ascii']。 您將需要更改代碼段編碼器以匹配。
  • 這沒有經過徹底測試。 邊界案例,沒有觸發我。 理想情況下,我們會有一個核心功能的測試台
  • 感謝肯頓瓦爾達挑戰我
  • 請讓我知道是否有 CloudFlare 工作人員在 forwardTheRest 函數中執行 pipeTo 的方法
  • 您可能會發現continueOrNewSearch和兩個子函數是一種在塊邊界上查找多字節的有趣方法。 直到邊界,我們只計算找到了多少字節。 沒有必要保留這些字節(我們知道它們是什么)。 然后在下一個塊上,我們從上次停下的地方繼續。 我們總是在頭部周圍切割數組緩沖區,並確保我們寫入頭部字節(使用 tagBytes)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM