简体   繁体   中英

text of a nested bullet-point list to HTML

I have this: Example input:

* First item
* Second item
    * Subitem 1
        * sub-subitem!
    * Subitem 3
* Third item

Example output:

<ul>
    <li>First item</li>
    <li>Second item
        <ul>
            <li>Subitem 1
                <ul>
                    <li>sub-subitem!</li>
                </ul>
            </li>
            <li>Subitem 3</li>
        </ul>
    </li>
    <li>Third item</li>
</ul>

I have created a Java class that send every String line to an array of chars and I treat every character alone. My problem is when to close the tags Any idea?

Here is my code:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class TextToHtml {
    StringBuilder itemName = new StringBuilder();
    String sCurrentLine;
    int usingUlTAG=0;

public TextToHtml(){
        BufferedReader br = null; 
        try {
            boolean closeLItag=false;
            br = new BufferedReader(new FileReader("NestedText.txt"));
            System.out.println("<ul>");
            while ((sCurrentLine = br.readLine()) != null) {
                    char[] item = sCurrentLine.toCharArray();
                    for(int i=0; i<item.length;i++){
                            if(item[i]!='*' && item[i]!='\n' && item[i]!='\t'){
                                    itemName.append(item[i]); 
                continue;
            }   
            if(item[i]=='*'){   
                itemName.append("<li>");
                closeLItag=true;
            }
            else if(item[i]=='\t'){ 
                if(item[i+1]=='*'){ 
                    if(usingUlTAG<1)
                    itemName.append("\t<ul>\n\t\t");
                    itemName.append("\t\n\t\t");
                    usingUlTAG= 1;
                    continue;
                }
                if(item[i+1]=='\t'){    
                    itemName.append("\t\t<ul>\n\n\t\t");
                    usingUlTAG=2;
                    continue;
                }
            }
        }
        if(closeLItag){
            itemName.append("</li>\n");
        }

    }       
    System.out.println(itemName+"/ul>");
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            if (br != null)br.close();
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}

public static void main(String[] args) {
    new TextToHtml();   
}
}

You'll have to look ahead to the next line and see if its list level is different from the current item's. Then you can add or close tags based on the difference in level, if any. Here's code that does this:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class TextToHtml
{
    StringBuilder itemName = new StringBuilder();
    String sCurrentLine;
    String sNextLine; // A "peek" at what's next to determine if </li> is needed

    public TextToHtml()
    {
        BufferedReader br = null;
        try
        {
            br = new BufferedReader(new FileReader("NestedText.txt"));
            System.out.println("<ul>");
            sNextLine = br.readLine();
            while ((sCurrentLine = sNextLine) != null)
            {
                sNextLine = br.readLine();

                char[] item = sCurrentLine.toCharArray();
                int itemLevel = 0;
                for (int i = 0; i < item.length; i++)
                {
                    if (item[i] != '*' && item[i] != '\n' && item[i] != '\t')
                    {
                        itemName.append(item[i]);
                    }
                    else if (item[i] == '*')
                    {
                        itemName.append("\t<li>");

                        // Trim leading space character
                        if (item[i + 1] == ' ')
                            i++;
                    }
                    else if (item[i] == '\t')
                    {
                        itemLevel++;
                        itemName.append("\t\t");
                    }
                }

                int nextItemLevel = 0;
                if (sNextLine != null)
                {
                    char[] nextItem = sNextLine.toCharArray();
                    for (int i = 0; i < nextItem.length; i++)
                    {
                        if (nextItem[i] == '\t')
                            nextItemLevel++;
                        else
                            break;
                    }
                }
                // Next is the same level; there are no subitems
                if (itemLevel == nextItemLevel)
                    itemName.append("</li>");
                // Next is a deeper level; there are subitems
                else if (itemLevel < nextItemLevel)
                {
                    // In case the next item is more than 1 level deeper
                    for (int i = itemLevel + 1; i <= nextItemLevel; i++)
                    {
                        itemName.append("\n");
                        for (int j = 0; j < i; j++)
                            itemName.append("\t\t");
                        itemName.append("<ul>");

                        // If the next item's level is reached, it will create its own     <li>
                        if (i != nextItemLevel)
                        {
                            itemName.append("\n");
                            for (int j = 0; j < i; j++)
                                itemName.append("\t\t");
                            itemName.append("\t<li>");
                        }
                    }
                }
                // Next is a higher level; there are tags to close
                else // (itemLevel > nextItemLevel)
                {
                    itemName.append("</li>");
                    for (int i = itemLevel - 1; i >= nextItemLevel; i--)
                    {
                        itemName.append("\n");
                        for (int j = 0; j <= i; j++)
                            itemName.append("\t\t");
                        itemName.append("</ul>\n");
                        for (int j = 0; j < i; j++)
                            itemName.append("\t\t");
                        itemName.append("\t</li>");
                    }
                }
                itemName.append("\n");
            }
            System.out.println(itemName + "</ul>");
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
        finally
        {
            try
            {
                if (br != null)
                    br.close();
            }
            catch (IOException ex)
            {
                ex.printStackTrace();
            }
        }
    }

    public static void main(String[] args)
    {
        new TextToHtml();
    }
}

Note that this will only work if the levels are indented with tabs, not spaces.

If, as your current code suggests, the list item lines in the original text all use hard tabs for indentation then all you need to do is work through the text a line at a time, keeping track of the indentation level (number of tabs) of the previous line. This code doesn't produce nice indentation in the resulting HTML but it gets the <ul> and <li> nesting right which is all an HTML browser really cares about

import java.io.*;
import java.util.regex.*;

public class Main {
  public static void main(String[] args) throws Exception {
    StringBuilder result = new StringBuilder();
    BufferedReader br = new BufferedReader(new FileReader("NestedText.txt"));
    try {
      int lastIndent = -1; // indent level of last line
      int depth = 0; // number of levels of <ul> we are currently inside
      String line;
      Pattern indentPattern = Pattern.compile("((\\t*)\\* )?(.*)");
      while((line = br.readLine()) != null) {
        Matcher m = indentPattern.matcher(line);
        m.matches(); // guaranteed to be true, but needed to update matcher state

        if(m.group(1) != null) { // this is a new list item
          int thisIndent = m.end(2); // number of leading tabs, may be zero

          // there are three possible cases
          if(thisIndent == lastIndent) {
            // same level as last list item
            result.append("</li>");
          } else if(thisIndent > lastIndent) {
            // starting a child list
            result.append("<ul>");
            depth++;
          } else {
            // returning to parent list
            result.append("</li>");
            depth--;
            result.append("</ul>");
            result.append("</li>");
          }

          result.append("<li>");
          lastIndent = thisIndent;
        } else { // this is a continuation of the previous list item
          result.append(" ");
        }
        // append this line's text (not including the indent and *)
        result.append(m.group(3));
      }

      // run out of items, close any outstanding lists
      while(depth-- > 0) {
        result.append("</li>");
        result.append("</ul>");
      }

      System.out.println(result);
    } finally {
      br.close();
    }
  }
}

Here, I treat any line that doesn't start with tabs and a star as a continuation of the previous <li> , ie

* This is a very long list
item that continues over several
  lines
* This is a second item
    * this is a child item
  that also continues
   over several lines

is OK.

I have fixed the problem following Jan Dvorak´s advice. the following code works for and I am putting below in case if it helps someone else. Thanks for all your contribution

MarkdownProcessor m = new MarkdownProcessor(); 
String html = null;
try {
html = m.markdown(MyString));
} catch (IOException e) {
e.printStackTrace();
} 
System.out.println(html);

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM