简体   繁体   中英

Compressing hexadecimal string using GZIP on the web (Javascript)

I have a textarea where the entered value is converted to hexadecimal and concatenated, as shown below:

4f43 5441 1d00 0000 2400 0000 0004 0000
0200 0000 0000 0000 0000 0000 0000 0000
0200 0000 0206 0073 6b79 626f 780e 0073
6b79 626f 7865 732f 7768 6974 6502 0800
6d61 7074 6974 6c65 1900 4f47 5a20 4564
6974 6f72 2066 6972 7374 2065 7665 7220
6d61 7003 6670 7300 0000 0000 0500 0200
0400 0300 0500 0700 0000 0044 0000 0044
0000 0044 0000 0000 0000 00ff 0000 0500
0000 0544 0000 0544 0000 0044 6e00 3200
0000 0000 0000 0200 0207 0007 0007 0001
0001 0001 0000 0202 0003 0004 0005 0006
0007 0000 0202 0003 0004 0005 0006 0007
0000 0202 0003 0004 0005 0006 0007 0000
0200 0000 0000 0007 0000 0000 0000 0100
0000 0000 0000 0000 0000 0000 0100 0000
0000 0000 0000 0000 0000 0100 0000 0000
0000 0000 0000 0000

My goal is to compress this hexadecimal using gzip "online" (ie without the zlib command line/nodejs) and make the output available for download using a blob.

This is my attempt for now using " PAKO ":

html

<script src="https://cdn.jsdelivr.net/pako/1.0.3/pako.min.js"></script>
<textarea id="input"></textarea>
<button onclick="toHex()">Convert to hex</button>
<a id="downloadbtn">Download as gzip</button>

javascript

var pako = window.pako;
function toHex(){
    input = document.getElementById("input").value;
    hexresult = input.split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(2,"0"),"");

    gzipresult = pako.gzip(hexresult, { level: 6 });
    download(gzipresult);
}

function download(data){
    downloadbtn = document.getElementById("downloadbtn");
    var blob = new Blob([ data ], {type : "application/gzip",});

    if (window.navigator.msSaveBlob) {
        navigator.msSaveBlob(blob, "output.gz");
    } else {
        var csvUrl = URL.createObjectURL(blob);
        $('#downloadbtn').attr({
            'download': "output.gz",
            'href': csvUrl
        });
    };
}

However the output generated by PAKO (gzip) does not match the output generated by zlib, they are not the same...

Is there a way to make the output of both be identical? or how can I compress a hexadecimal string correctly for gzip using JavaScript?

Update: @Blex had mentioned the use of Buffer.from , I think it is something native to nodejs (and I need it in the browser) so I tried this standalone script from this repo , however the generated file is still the same (still different from zlib), but anyway, I appreciate the attempt to help!

Comparison

Input: "68656c6c6f20776f726c6421" (Hello world!)

zlib output (mingw command line): 1f8b 0800 4767 4c5e 0003 cb48 cdc9 c957 28cf 2fca 4951 0400 6dc2 b403 0c00 0000

pako output: 1f8b 0800 0000 0000 0003 33b3 3033 354b 06c2 3423 0373 73b3 3473 2320 dbc4 c810 00b2 2eed 2a18 0000 00

Edit: After writing this answer a little too fast, I noticed the outputs were not absolutely the same. There is a one byte difference, corresponding to the OS type header . zlib returns a , which represents TOPS-20 , and Pako returns 3 , which represents Unix .

zlib returns a Buffer, and pako returns a Uint8Array. To get a Buffer, you can do:

gzipresult = pako.gzip(hexresult, { level: 6 });
gzipbuffer = Buffer.from(gzipresult);

This will give you the exact a similar output to zlib. Here is a simple NodeJS script to test and compare both:

// Don't forget to `npm i -S pako`
const pako = require("pako");
const zlib = require("zlib");

const input = "Hello world!";
const hexStr = input.split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(2,"0"),"");

zlib.gzip(hexStr, function(_, zlibResult) {
  const zlibHex = zlibResult.toString("hex");

  const pakoResult = pako.gzip(hexStr, { level: 6 });
  const pakoHex = Buffer.from(pakoResult).toString("hex");

  console.log("zlib", zlibHex);
  console.log("pako", pakoHex);
  console.log("Outputs are equal ===", zlibHex === pakoHex);
});


/*
Output:
pako 1f8b080000000000000333b13033354b06c23423037373b334732320dbc4c81000bde2d6f318000000
zlib 1f8b080000000000000a33b13033354b06c23423037373b334732320dbc4c81000bde2d6f318000000
                        ^
             actually not the same
Outputs are equal === false
*/

So, it turns out that this whole confusion occurred because my input, in addition to being plain text, it had several space characters (which I put for better viewing), this was obviously affecting the result when converting to hexadecimal... in order to make it work the way I was expecting, I just had to remove all the space characters, enconde it as hexadecimal and then compress it with pako.

var pako = window.pako;
function toHex(){
    input = document.getElementById("input").value.replace(/\s/g, "");
    hexresult = new Uint8Array(input.match(/.{2}/g).map(e => parseInt(e, 16)));

    gzipresult = pako.gzip(hexresult);
    download(gzipresult);
}

function download(data){
    downloadbtn = document.getElementById("downloadbtn");
    var blob = new Blob([ data ], {type : "application/octet-stream",});

    if (window.navigator.msSaveBlob) {
        navigator.msSaveBlob(blob, "output.gz");
    } else {
        var csvUrl = URL.createObjectURL(blob);
        $('#downloadbtn').attr({
            'download': "output.gz",
            'href': csvUrl
        })
    }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM