簡體   English   中英

將無效的HTML表解析為JSON

[英]Parsing Invalid HTML Table to JSON

我正在嘗試構建一個Chrome擴展程序(目的是,該擴展程序讀取錯誤編碼和格式化的表 ,生成JSON並用從JSON生成的更干凈,更漂亮的表替換該表)。 該表/腳本如下所示(它們是可執行的)

 function arrayify(collection) { return Array.prototype.slice.call(collection); } function factory(headings) { return function(row) { return arrayify(row.cells).reduce(function(prev, curr, i) { prev[headings[i]] = curr.innerHTML ; return prev; }, {}); } } function parseTable(table) { var headings = arrayify(table.tHead.rows[0].cells).map(function(heading) { return heading.innerHTML ; }); return arrayify(table.tBodies[0].rows).map(factory(headings)); } var table = document.querySelector("table"); var data = parseTable(table); console.log(data); 
 <!DOCTYPE html> <html> <head> </head> <body> <table> <thead> <th class="col-sm-2"></th> <th class="col-sm-3"></th> <th class="col-sm-1">Eats Grass</th> <th class="col-sm-1">Eats Fish</th> <th class="col-sm-1">Eats Meat</th> <th class="col-sm-1">Drinks Wine</th> </thead> <tbody> <tr> <td class="td-dark" rowspan="2">Animal</td> </tr> <tr> <td class="td-dark">Cow</td> <td class="td-dark"> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class="td-dark"></td> <td class="td-dark"></td> <td class="td-dark"></td> </tr> <tr> <td class="" rowspan="9">Mammal</td> </tr> <tr> <td class="">Whale</td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class=""></td> <td class=""></td> </tr> <tr> <td class="">Area 51 Alien</td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class=""></td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> </tr> </tbody> </table> </body> </html> 

如您所見,問題是控制台JSON根本不是我希望得到的。 我想要的是:

+-+
+-+
 |
 |
 +------+Animal
 |             +
 |             |
 |             |
 |             |
 |             +----+ Cow
 |                     +
 |                     +-----+ Eats Grass
 +-------+Mammal
               +----+ Whale
               |       +
               |       |
               |       +---+  Eats Grass
               |       |
               |       +---+  Eats Fish
               |
               |
               |
               |
               |
               |
               +-----+ Area 51 Alien
                           +
                           |
                           +-----+
                           |
                           +-----+

桌子很大,大約有100列,有幾個sections (例如Animal ),有多個,一個或沒有subsections (例如CowWhale )。 如何才能做到這一點? 我知道表是原始的,但是更改它是不可能的,因為它超出了我的控制范圍。

編輯 :表格格式錯誤 這就是挑戰! 反正我不能修桌子。 因此,我試圖編寫一個插件來正確顯示它。

表格的較長示例http : //pasted.co/5d779888

好的,您可能需要添加一些小調整,但是我得到了一些東西:

var json_result = [];

var tds = document.querySelectorAll('td');
var ths = document.querySelectorAll('th');
var current_section = '';
var current_subsection = '';
var current_col = 0;

for(j in tds){
    if(typeof tds[j].innerHTML !== "undefined"){
        if(tds[j].innerHTML.indexOf('<input') > -1){
            // checked box
            json_result[current_section][current_subsection].push(ths[current_col+2].innerHTML);
            current_col++;
        }
        else{
            if(tds[j].innerHTML.indexOf('            ') == -1){
                if(tds[j].attributes.rowspan){
                    //section
                    current_section = tds[j].innerHTML;
                    json_result[current_section] = [];

                }
                else{
                    //subsection
                    current_subsection = tds[j].innerHTML;
                    json_result[current_section][current_subsection] = [];
                    current_col = 0;
                }
            }
            else{
                current_col++;
            }
        } 
    } 
}
console.log(json_result);

以您的長表示例為例:

{
    "": {
        "Grass Eater": [
            "Grass Eater"
        ]
    },
    "Animal": {
        "Cow": [
            "Grass Eater",
            "Plant Eater",
            "Singer",
            "Wings"
        ],
        "Platypus": [
            "Grass Eater",
            "Plant Eater",
            "Water Drinker",
            "Singer",
            "Wings"
        ],
        "Horse": [
            "Grass Eater",
            "Plant Eater",
            "Singer",
            "Wings"
        ],
        "Alien": [
            "Grass Eater",
            "Plant Eater",
            "Singer"
        ],
        "Doggo": [
            "Grass Eater",
            "Plant Eater",
            "Singer"
        ],
        "Dragon": [
            "Grass Eater",
            "Plant Eater",
            "Singer"
        ],
        "Horse Cart": [
            "Grass Eater",
            "Plant Eater",
            "Singer"
        ],
        "Alligator": [
            "Grass Eater",
            "Plant Eater"
        ]
    },
    "Mammal": {
        "Cow": [
            "Grass Eater",
            "Plant Eater",
            "Singer",
            "Dancer",
            "Someone",
            "Wings"
        ],
        "Platypus": [
            "Grass Eater",
            "Plant Eater",
            "Water Drinker",
            "Singer",
            "Dancer",
            "Someone",
            "LSleepyerally",
            "Running",
            "Out",
            "Of",
            "Now",
            "Vegan",
            "Sausage",
            "Cheese",
            "Nugget",
            "Wings",
            "Foodie",
            "Hoodie",
            "Finished",
            ""
        ],
        "Horse": [
            "Grass Eater",
            "Plant Eater",
            "Singer",
            "Dancer",
            "Someone",
            "Wings"
        ],
        "Alien": [
            "Grass Eater",
            "Plant Eater",
            "Singer",
            "Dancer"
        ],
        "Doggo": [
            "Grass Eater",
            "Plant Eater",
            "Singer",
            "Dancer"
        ],
        "Dragon": [
            "Grass Eater",
            "Plant Eater",
            "Dancer"
        ],
        "Horse Cart": [
            "Grass Eater",
            "Plant Eater"
        ],
        "Alligator": [
            "Grass Eater",
            "Plant Eater"
        ]
    },
    "Marsupial": {
        "Cow": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Singer"
        ],
        "Platypus": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Water Drinker",
            "Singer"
        ],
        "Horse": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Singer"
        ],
        "Alien": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Singer"
        ],
        "Doggo": [
            "Grass Eater",
            "Plant Eater",
            "Singer"
        ],
        "Dragon": [
            "Grass Eater"
        ],
        "Horse Cart": [
            "Grass Eater",
            "Plant Eater"
        ],
        "Alligator": [
            "Grass Eater"
        ]
    },
    "Sleepy Mammal": {
        "Cow": [
            "Grass Eater",
            "Fish Eater",
            "Singer",
            "Words",
            "Hoodie"
        ],
        "Platypus": [
            "Grass Eater",
            "Fish Eater",
            "Water Drinker",
            "Singer",
            "Words",
            "Hoodie"
        ],
        "Horse": [
            "Grass Eater",
            "Fish Eater",
            "Singer",
            "Words",
            "Hoodie"
        ],
        "Alien": [
            "Grass Eater",
            "Fish Eater",
            "Singer"
        ],
        "Doggo": [
            "Grass Eater",
            "Fish Eater",
            "Singer",
            "Words",
            "Hoodie"
        ],
        "Dragon": [
            "Grass Eater",
            "Fish Eater",
            "Words",
            "Hoodie"
        ],
        "Horse Cart": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater"
        ],
        "Alligator": [
            "Grass Eater",
            "Fish Eater"
        ]
    },
    "Squsihy": {
        "Cow": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Singer"
        ],
        "Platypus": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Water Drinker",
            "Singer"
        ],
        "Horse": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Singer"
        ],
        "Alien": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Singer"
        ],
        "Doggo": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater",
            "Singer"
        ],
        "Dragon": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater"
        ],
        "Horse Cart": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater"
        ],
        "Alligator": [
            "Grass Eater",
            "Plant Eater",
            "Fish Eater"
        ]
    },
    "Plushies": {
        "Cow": [
            "Grass Eater",
            "Plant Eater",
            "Singer",
            "Someone",
            "Running",
            "Out",
            "Of",
            "Fake"
        ],
        "Platypus": [
            "Grass Eater",
            "Plant Eater",
            "Water Drinker",
            "Oil Drinker",
            "Milk Drinker",
            "Singer",
            "Someone",
            "LSleepyerally",
            "Running",
            "Out",
            "Of",
            "Fake",
            "Words",
            "Now",
            "Vegan",
            "Sausage",
            "Cheese",
            "Nugget",
            "Wings",
            "Foodie",
            "Hoodie",
            "Finished",
            ""
        ],
        "Horse": [
            "Grass Eater",
            "Plant Eater",
            "Oil Drinker",
            "Milk Drinker",
            "Singer",
            "Someone",
            "LSleepyerally",
            "Running",
            "Out",
            "Of",
            "Fake",
            "Words",
            "Now",
            "Vegan",
            "Sausage",
            "Cheese",
            "Nugget",
            "Wings",
            "Foodie",
            "Hoodie",
            "Finished",
            ""
        ],
        "Alien": [
            "Grass Eater",
            "Plant Eater"
        ]
    }
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM