简体   繁体   中英

Converting VBScript to Javascript, What is the right way of parsing source code?

I was asked to convert some VB6/VBScript code to javascript so after googling it and not finding anything I can use,I wrote a small javascript function to help me do the conversion; it's so crude and only converts (some of) the synatx, but it worked for me for the job I had...now I'm thinking of improving it but the method I used is so primitive (Regular Expression matching and replacing). So...my question is: What is the proper way to parse source code? is there any (not so complicated) way of doing it? and I don't want to use Exe's, it must be done entirely in Javascript. I'm not searching for ready-to-use source code (I don't think it exists!) but I want to learn how to be able to start with source code and turn it into objects (the opposite of serialization, I think?).

//here is the code:

var strs=[];

function vbsTojs(vbs){


    var s = vbs;

    s = HideStrings(s);

    //only function block
    s = s.match(/Function[\w\W]+End\s+Function/gim)[0];

    //line-continuation char
    s = s.replace(/_\n/gm,"");

    //replace ":" with CRLF
    s = s.replace(/:/gm,"\n");

    //move inline comment to its own line
    s = s.replace(/^(.+)'(.*)$/gim,"'$2\n$1");

    //single line if -> multiple line
    s = s.replace(/\bthen\b[ \t](.+)/gi,"then\n$1\nEnd If");

    //alert(s);

    var Vars='';
    var Fx='';
    var FxHead='';
    var Args = '';

    a=s.split('\n');

    //trim
    for(i=0;i<a.length;i++){
        a[i]=a[i].replace(/^\s+|\s+$/,"");
    }
     //remove empty items
    a=a.filter(function(val) { return val !== ""; });


    //alert(a.join('\n'));


    //function
    a[0]=a[0].replace(/function\s+/i,"");
    Fx = a[0].match(/^\w+/)[0];
    a[0]=a[0].replace(Fx,"").replace(/[\(\)]/g,"");
    a[0]=a[0].replace(/\bbyval\b/gi,"").replace(/\bbyref\b/gi,"").replace(/\boptional\b/gi,"");
    a[0]=a[0].replace(/\bas\s+\w+\b/gi,"");
    a[0]=a[0].replace(/\s+/g,"");
    a[0]=a[0].replace(/,/gi,", ");
    FxHead = "function " + Fx+ " ("+ a[0] + "){";
    a[0]="";

    //end function
    a.length = a.length-1;

    for(i=1;i<a.length;i++){

        //Vars
        if(a[i].search(/^dim\s+/i)>-1){
            a[i]=a[i].replace(/dim\s*/i,"");
            Vars += a[i] + ",";
            a[i]='';

        //FOR
        }else if(a[i].search(/^\bFOR\b\s+/i)>-1){
            a[i]=a[i].replace(/^\bFOR\b\s+/i,"");
            counter = a[i].match(/^\w+/)[0];
            from = a[i].match(/=\s*[\w\(\)]+/)[0];
            from=from.replace(/=/,"").replace(/\s+/g,"");
            a[i]=a[i].replace(counter,"").replace(from,"").replace(/\bTO\b/i,"");
            to = a[i].match(/\s*[\w\(\)]+\s*/)[0];
            to=to.replace(/=/,"").replace(/\s+/g,"");
            a[i] = "for(" + counter + "=" + from + "; " + counter + "<=" + to + "; " + counter + "++){"

        //NEXT
        }else if(a[i].search(/^NEXT\b/i)>-1){
            a[i] = "}";
        //EXIT FOR
        }else if(a[i].search(/\bEXIT\b\s*\bFOR\b/i)>-1){
            a[i] = "break";

        //IF
        }else if(a[i].search(/^\bIF\b\s+/i)>-1){
            a[i]=a[i].replace(/^\bIF\b\s+/i,"");
            a[i]=a[i].replace(/\bTHEN$\b/i,"");
            a[i]=a[i].replace(/=/g,"==").replace(/<>/g,"!=");                 //TODO: it should not replace if inside a string! <---------------
            a[i]=a[i].replace(/\bOR\b/gi,"||").replace(/\bAND\b/gi,"&&");     //TODO: it should not replace if inside a string! <---------------
            a[i] = "if(" + a[i] + "){";

        //ELSE
        }else if(a[i].search(/^ELSE/i)>-1){
            a[i] = "}else{";

        //END IF
        }else if(a[i].search(/^END\s*IF/i)>-1){
            a[i] = "}";

        //WHILE
        }else if(a[i].search(/^WHILE\s/i)>-1){
            a[i] = a[i].replace(/^WHILE(.+)/i,"while($1){");
        //WEND
        }else if(a[i].search(/^WEND/i)>-1){
            a[i] = "}";

        //DO WHILE
        }else if(a[i].search(/^DO\s+WHILE\s/i)>-1){
            a[i] = a[i].replace(/^DO\s+WHILE(.+)/i,"while($1){");
        //LOOP
        }else if(a[i].search(/^LOOP$/i)>-1){
            a[i] = "}";

        //EXIT FUNCTION
        }else if(a[i].search(/\bEXIT\b\s*\bFUNCTION\b/i)>-1){
            a[i] = "return";

        //SELECT CASE
        }else if(a[i].search(/^SELECT\s+CASE(.+$)/i)>-1){
            a[i]=a[i].replace(/^SELECT\s+CASE(.+$)/i,"switch($1){");
        }else if(a[i].search(/^END\s+SELECT/i)>-1){
            a[i] = "}";
        }else if(a[i].search(/^CASE\s+ELSE/i)>-1){
            a[i] = "default:";
        }else if(a[i].search(/^CASE[\w\W]+$/i)>-1){
            a[i] = a[i] + ":" ;
        }
        //CONST
        else if(a[i].search(/^CONST/i)>-1){
            a[i] = a[i].replace(/^CONST/i,"const");
        }

        else{
            //alert(a[i]);
        }

        //COMMENT
        if(a[i].search(/^\'/)>-1){
            a[i]=a[i].replace(/^\'/,"//");
        }else if(a[i].search(/\'.*$/)>-1){
            a[i]=a[i].replace(/\'(.*)$/,"//$1");
        }
    }

    //alert(a.join("*"));   

    Vars = Vars.replace(/\s*AS\s+\w+\s*/gi,"");
    if(Vars!="") Vars = "var " + Vars.replace(/,$/,";").replace(/,/g,", ");
    FxHead  + '\n' + Vars;

    a=a.filter(function(val) { return val !== ""; }) //remove empty items

    for(i=0;i<a.length;i++){
        if (a[i].search(/[^}{:]$/)>-1) a[i]+=";";
    }

    ss = FxHead + '\n' + Vars + '\n' + a.join('\n') + '\n}';

    ss = ss.replace(new RegExp(Fx+"\\s*=\\s*","gi"),"return ");

    ss = UnHideStrings(ss);

    return jsIndenter(ss);
}




//-----------------------------------------------------

function jsIndenter(js){

    var a=js.split('\n');
    var margin=0;
    var s = '';

    //trim
    for(i=0;i<a.length;i++){ a[i]=a[i].replace(/^\s+|\s+$/,""); }
     //remove empty items
    a=a.filter(function(val) { return val !== ""; });


    for(var i=1;i<a.length;i++){

        if(a[i-1].indexOf("{")>-1) margin += 4 ;

        if(a[i].indexOf("}")>-1) { margin -= 4; }

        if(margin<0) margin = 0;

        a[i] = StrFill(margin," ") + a[i] ;
    }
    return a.join('\n');
}


function StrFill(Count,StrToFill){
    var objStr,idx;
    if(StrToFill=="" || Count==0){
        return "";
    }
    objStr="";
        for(idx=1;idx<=Count;idx++){
        objStr += StrToFill;
    }
    return objStr;
}

function HideStrings(text){

    const x = String.fromCharCode(7);
    const xxx = String.fromCharCode(8);

    text = text.replace(/"""/gim, '"'+xxx);  //hide 3 quotes " " "
    var idx=0, f=0;
    while(f>-1){
        f = text.search(/".+?"/gim);
        if(f>-1){
            strs.push(text.match(/".+?"/)[0]);
            //alert(strs[idx]);
            text = text.replace(/".+?"/, x+idx+x);
            idx++;
        }
    }
    //alert(text);
    return text;
}

function UnHideStrings(text){
    for(var i=0; i<strs.length; i++){
        text = text.replace(new RegExp("\\x07"+i+"\\x07"), strs[i]);
    }
    //Unhide 3 quotes " " " ***BUG: causes unterminated string if triple-quotes are at the end of the string
    text = text.replace(/\x08/gim,'\\"');
    text = text.replace(/""/gi,'\\"');    
    return text;
}

The proper way to parse source code for any programming language is to use a parser . Regular expressions are a useful part of (some) parsers, but a parser is a different sort of thing. There is quite a body of research and techniques in the Computer Science literature on the subject of parsing, and it's a fascinating pursuit to study.

"Converting" a bunch of Visual Basic code to Javascript is a project that seems inherently fraught with peril and mystery. A Visual Basic parser will be just the first significant hurdle to conquer. After that, you'll need to figure out how to semantically represent the Visual Basic operations in Javascript. Depending on the original context of the code, that could be somewhat weird. (You don't mention anything about where this code all runs.)

As enriching a learning experience as this might be, it's not unlikely that translating the code by hand will (in the end) take less time and produce better results. That's particularly true if you're just now finding out that there is such a thing as a "parser".

Nice job. Sounds like you did something that might not be perfect, but it did the job.

I'd recommend looking into parsers and grammars if you want to make it more sophisticated. There are lots of parser generators that would be able to help you. You'd have to come up with a grammar for the source language, generate the lexer/parser, and then use that to generate an abstract syntax tree (AST). Once you have that, you can walk the AST and ask it to emit any code you want.

It's doable but, as Oded says, it's not trivial.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM