簡體   English   中英

將字符串拆分為標記,然后按字符串中出現的順序顯示它們

[英]Splitting up a string into tokens, then displaying them in order of appearance in the string

我正在編寫一個程序來讀取一個字符串,其中包含文本形式的代碼。 該程序掃描輸入搜索關鍵字標記並按出現順序打印它們。 但是它錯過了getKeyword()方法中定義的關鍵字。 出場順序也出乎意料。

提前為代碼道歉,我是編程新手。

public class Testing{
enum OperatorToken {

//這里的代幣 go

}


public static String inputString = "public class HelloWorld";



private static void stringOutput() {
    String[] lines = inputString.split("\\r?\\n");
    for (int i = 0; i < lines.length; i++)
    {
        String line = lines[i];
        for (int j = 0; j < line.length(); j++)
        {
            TokenType tokenTypeChar = getOP(line.charAt(j));
            TokenType tokenTypeSymbol = getSymbol(line.charAt(j));

            if (tokenTypeChar != null){
                System.out.printf("Line %d: %s, %s", i + 1, tokenTypeChar.name(), line.charAt(j));
                System.out.println(" ");
            }else if (tokenTypeSymbol != null){
                System.out.printf("Line %d: %s, %s", i + 1, tokenTypeSymbol.name(), line.charAt(j));
                System.out.println(" ");
            }
        }

        TokenType tokenTypeString = getOP(line);
        TokenType tokenTypeKeyword = getKeyword(line);

        if (tokenTypeKeyword != null)
        {
            System.out.printf("Line %d: %s, %s", i + 1, tokenTypeKeyword.name(), SubString);
            System.out.println(" ");
        }

        else if (tokenTypeString != null)
        {
            System.out.printf("Line %d: %s, %s", i + 1, tokenTypeString.name(), SubString);
            System.out.println(" ");
        }

    }
}

private static TokenType getOP(char ch) {
    switch (ch) {
        case '+':
            return TokenType.OP_ADD;
        case '-':
            return TokenType.OP_SUBTRACT;
        case '/':
            return TokenType.OP_DIVIDE;
        case '*':
            return TokenType.OP_MULTIPLY;
        // more
    }
    return null;
}

public static TokenType getSymbol(char ch)
{

    switch (ch) {
        case '(':
            return TokenType.LEFT_PAREN;
        case ')':
            return TokenType.RIGHT_PAREN;
        case '{':
            return TokenType.LEFT_BRACE;
        case '}':
            return TokenType.RIGHT_BRACE;
        case '[':
            return TokenType.LEFT_BRACKET;
        case ']':
            return TokenType.RIGHT_BRACKET;
        case ';':
            return TokenType.SEMICOLON;
        case ':':
            return TokenType.COLON;
        case ',':
            return TokenType.COMMA;

    }

    return null;

}

public static TokenType getOP(String str)
{

    if (str.contains("<="))
    {
        SubString = "<=";
        return TokenType.OP_LESSEQUAL;
    }

    else if (str.contains(">="))
    {
        SubString = ">=";
        return TokenType.OP_GREATEREQUAL;
    }

    else if (str.contains("=="))
    {
        SubString = "==";
        return TokenType.OP_EQUAL;
    }

    else if (str.contains("!="))
    {
        SubString = "!=";
        return TokenType.OP_NOTEQUAL;
    }

    else if (str.contains("&&"))
    {
        SubString = "&&";
        return TokenType.OP_AND;
    }

    else if (str.contains("||"))
    {
        SubString = "||";
        return TokenType.OP_OR;
    }

    return null;

}


public static TokenType getKeyword(String str)
{
    if (str.contains("if"))
    {
        SubString = "if";
        return TokenType.KEYWORD_IF;
    }

    else if (str.contains("else"))
    {
        SubString = "else";
        return TokenType.KEYWORD_ELSE;
    }

    else if (str.contains("while"))
    {
        SubString = "while";
        return TokenType.KEYWORD_WHILE;
    }

    else if (str.contains("return"))
    {
        SubString = "return";
        return TokenType.KEYWORD_RETURN;
    }

    else if (str.contains("main"))
    {
        SubString = "main";
        return TokenType.KEYWORD_MAIN;
    }

    else if (str.contains("int"))
    {
        SubString = "int";
        return TokenType.KEYWORD_INT;
    }

    else if (str.contains("double"))
    {
        SubString = "double";
        return TokenType.KEYWORD_DOUBLE;
    }
    else if (str.contains("String"))
    {
        SubString = "String";
        return TokenType.KEYWORD_STRING;
    }

    else if (str.contains("boolean"))
    {
        SubString = "boolean";
        return TokenType.BOOLEAN;
    }

    else if (str.contains("public"))
    {
        SubString = "public";
        return TokenType.KEYWORD_PUBLIC;
    }

    else if (str.contains("class"))
    {
        SubString = "class";
        return TokenType.KEYWORD_CLASS;
    }

    else if (str.contains("void"))
    {
        SubString = "void";
        return TokenType.KEYWORD_VOID;
    }

    else if (str.contains("for"))
    {
        SubString = "for";
        return TokenType.KEYWORD_FOR;
    }

    else if (str.contains("case"))
    {
        SubString = "case";
        return TokenType.KEYWORD_CASE;
    }

    else if (str.contains("static"))
    {
        SubString = "static";
        return TokenType.KEYWORD_STATIC;
    }

    else if (str.contains("break"))
    {
        SubString = "break";
        return TokenType.KEYWORD_BREAK;
    }

    else if (str.contains("continue"))
    {
        SubString = "continue";
        return TokenType.KEYWORD_CONTINUE;
    }

    else if (str.contains("default"))
    {
        SubString = "default";
        return TokenType.KEYWORD_DEFAULT;
    }
    return null;
}

public static void main(String[] args) {

    stringOutput();


    }
}

}

當前 output:

Line 1: LEFT_BRACE, { 
Line 1: KEYWORD_PUBLIC, public 
Line 2: LEFT_PAREN, ( 
Line 2: LEFT_BRACKET, [ 
Line 2: RIGHT_BRACKET, ] 
Line 2: RIGHT_PAREN, ) 
Line 2: LEFT_BRACE, { 
Line 2: KEYWORD_MAIN, main 
Line 3: SEMICOLON, ; 
Line 3: KEYWORD_INT, int 
Line 4: SEMICOLON, ; 
Line 4: KEYWORD_DOUBLE, double 
Line 5: OP_ADD, + 
Line 5: OP_MULTIPLY, * 
Line 5: SEMICOLON, ; 
Line 5: KEYWORD_DOUBLE, double 
Line 6: LEFT_PAREN, ( 
Line 6: COLON, : 
Line 6: OP_ADD, + 
Line 6: RIGHT_PAREN, ) 
Line 6: SEMICOLON, ; 
Line 6: KEYWORD_INT, int 
Line 7: RIGHT_BRACE, } 
Line 8: RIGHT_BRACE, } 

您設置 for 循環的方式,每行只會打印出一個關鍵字。 那是因為您正在循環遍歷程序中的每一行文本並在該循環內調用一次 getKeyword。 但是,一行代碼可能包含多個關鍵字。

例如,程序的第一行有 2 個關鍵字,但是當您將該行傳遞給 getKeyword 方法時,它只會返回一個關鍵字:

getKeyword("public class HelloWorld {")
>> public

您可以從循環遍歷行中每個單詞的內部循環開始:

for (int i = 0; i < lines.length; i++)
{
    String line = lines[i];

    // Parse symbols
    for (int j = 0; j < line.length(); j++)
    {
        TokenType tokenTypeChar = getOP(line.charAt(j));
        TokenType tokenTypeSymbol = getSymbol(line.charAt(j));

        if (tokenTypeChar != null){
            System.out.printf("Line %d: %s, %s", i + 1, tokenTypeChar.name(), line.charAt(j));
            System.out.println(" ");
        } else if (tokenTypeSymbol != null){
            System.out.printf("Line %d: %s, %s", i + 1, tokenTypeSymbol.name(), line.charAt(j));
            System.out.println(" ");
        }
    }

    // Parse keywords in line
    String[] words = line.split("\\s+");
    for (String word : words)
    {
        TokenType tokenTypeKeyword = getKeyword(word);    
        if (tokenTypeKeyword != null) {
            System.out.println("Keyword found: " + tokenTypeKeyword);
        }
    }
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM