简体   繁体   中英

Trying to parse an rtf file using javascript

I have a rtf file that gets uploaded and once its uploaded I want it to be parsed and converted to a json file. I'm using mostly angular for this. I have the upload working just fine, I'm just having an issue with the parsing. The file does get cleaned up but its not properly formated as a json file.Does anyone have any suggestions as to the best thing to do?

this.parseFile=function(file, uploadUrl){

    reader.onload = function(onLoadEvent){

            var dataURL = reader.result;

function saveFile(file){

    var fd = new FormData();
    fd.append('save_file', file);
        method: 'POST',
        url: "includes/loadFile.php",
        data: fd,
        transformRequest: angular.identity,
        headers: {'Content-Type': undefined}

    .success(function (data, status, headers, config) {
        msg = {
                title: "success",
                cls: "success",
                msg: data
    .error(function (data, status, header, config) {
        msg = {
                title: "error",
                cls: "danger",
                msg: data
function setUpData(data)
    text = data;

    text = text.replace('/[\p{Z}\s]{2,}/u', ' ');
    // skip over the heading stuff
    j= text.indexOf('{',1); // skip ahead to the first part of the header

    var loc = 1;
    var t ="";

    var ansa="";
    len = text.length;

    getpgraph(); // skip by the first paragrap

    while(j<len) {
        c = text.substr(j,1);
        if (c=="\\") {
            // have a tag
            var tag = gettag();
            if (tag.length > 0) {
                // process known tags
                switch (tag) {
                    case 'par':
                    // ad a list of common tags
                    // parameter tags
                    case 'spriority1':
                    case 'fprq2':
                    case 'author':
                    case 'operator':
                    case 'sqformat':
                    case 'company':
                    case 'xmlns1':
                    case 'wgrffmtfilter':
                    case 'pnhang':
                    case 'themedata':
                    case 'colorschememapping':
                        var tt = gettag();
                    case '*':
                    case 'info':
                    case 'stylesheet':
                        // gets to end of paragraph
                    // ignore the tag
         } else {
            ansa += c;

    ansa = ansa.replace(/{|}|"|( | )\r/g,'');
    ansa = ansa.replace(/Amount After|Amt After/g,'');
    ansa = ansa.replace(/negetive amount means Refund NOTE: Layaways are not in Sales Report till they're paid off/g,'');
    ansa = ansa.replace("On Line Vendor Sales Quick Report by Month",'');
    ansa = ansa.replace(/  ( )  /g,'');



    var newData = new Array();
    var vendor = new Array();
    var orders = new Array();
    var v = 0;

    //Lets look for the line that has the word vendor
    vendorLine = ansa.match(/Vendor/gi);

    //Look for each order for the vendor
    orderLine = ansa.match(/date/gi);
    figuresLine = ansa.match(/figures/g);

    var start;
    var end;
    for(arr = 0; arr <=ansa.length; arr++)


        vendorInfo = ansa.split(' ',ansa.replace('/Vendor|\r|\s/','') ); 
        vendorFName = vendorInfo[0];
        vendorLName = vendorInfo[1];
        vendorID = vendorInfo[2];
        vendor = {vendorid: vendorID, vendorInfo:{fname: vendorFName,lname:vendorLName} , orders:orders};


    if( orderLine ) 
        start = 1;

    if( figuresLine ) 
        end = 0;
    if( !end || !start)
        if( end < start  )
            ordersLine = explode(" ",v);
            orderDesc = new Array();
            //Remove the stuff we dont need. Like Qty ,Subtotal and Inv#. Once we remove them we reindex them
            unset(ordersLine[0],ordersLine[2],ordersLine[count(ordersLine)]); // Removes Subtotal and Inv#
            unset(ordersLine[count(ordersLine)]); // Removes QTY
            ordersLine = array_values(ordersLine);

            orderDate = ordersLine[0];
            orderAmt = end(ordersLine);

            /*foreach (ordersLine as key=>value)
                if(key != 0)
                    if(key != count(ordersLine)-1)
                        orderLines = array("vendorid"=>vendorID,"order"=>array("date"=> orderDate,"desc"=> implode(" " ,orderDesc), "amount"=>orderAmt) );

    vendor['orders'] = orders;

function gettag() {
    // gets the text following the / character or gets the param if it there

    var tag='';
    while(true) {
        if (j>=len) break;
        c = text.substr(j,1);
        if (c==';') break;
        if (c=='}') break;
        if (c=="\\") {
        if (c=="{") {
        if (((c>='0')&&(c<='9'))||((c>='a')&&(c<='z'))||((c>='A')&&(c<='Z'))||c=="'"||c=="-"||c=="*" ){
            tag= tag+c;
        } else {
            // end of tag
    return tag;

function getpgraph() {
    // if the first char after a tag is { then throw out the entire paragraph
    // this has to be nested

    var nest = 0;
    while(true) {
        if (j>=len) break;
        if (text.substr(j,1)=='}') {

            if (nest==0) return;
        if (text.substr(j,1)=='{') {

What I expect from this is to create a clean well formatted JSON file that it can then be displayed back to the user on the page that they are in. Basically refresh the page with the newly uploaded parsed data.

There is a JS RTF parser that may help. You would then need to json encode the result. https://github.com/lazygyu/RTF-parser

Not sure if this answers your question, but a little more information might help lock down the fix.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

粤ICP备18138465号  © 2020-2024 STACKOOM.COM