[英]Parsing Invalid HTML Table to JSON
我正在嘗試構建一個Chrome擴展程序(目的是,該擴展程序讀取錯誤編碼和格式化的表 ,生成JSON並用從JSON生成的更干凈,更漂亮的表替換該表)。 該表/腳本如下所示(它們是可執行的)
function arrayify(collection) { return Array.prototype.slice.call(collection); } function factory(headings) { return function(row) { return arrayify(row.cells).reduce(function(prev, curr, i) { prev[headings[i]] = curr.innerHTML ; return prev; }, {}); } } function parseTable(table) { var headings = arrayify(table.tHead.rows[0].cells).map(function(heading) { return heading.innerHTML ; }); return arrayify(table.tBodies[0].rows).map(factory(headings)); } var table = document.querySelector("table"); var data = parseTable(table); console.log(data);
<!DOCTYPE html> <html> <head> </head> <body> <table> <thead> <th class="col-sm-2"></th> <th class="col-sm-3"></th> <th class="col-sm-1">Eats Grass</th> <th class="col-sm-1">Eats Fish</th> <th class="col-sm-1">Eats Meat</th> <th class="col-sm-1">Drinks Wine</th> </thead> <tbody> <tr> <td class="td-dark" rowspan="2">Animal</td> </tr> <tr> <td class="td-dark">Cow</td> <td class="td-dark"> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class="td-dark"></td> <td class="td-dark"></td> <td class="td-dark"></td> </tr> <tr> <td class="" rowspan="9">Mammal</td> </tr> <tr> <td class="">Whale</td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class=""></td> <td class=""></td> </tr> <tr> <td class="">Area 51 Alien</td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> <td class=""></td> <td class=""> <input checked="checked" disabled="disabled" type="checkbox" /> </td> </tr> </tbody> </table> </body> </html>
如您所見,問題是控制台JSON根本不是我希望得到的。 我想要的是:
+-+
+-+
|
|
+------+Animal
| +
| |
| |
| |
| +----+ Cow
| +
| +-----+ Eats Grass
+-------+Mammal
+----+ Whale
| +
| |
| +---+ Eats Grass
| |
| +---+ Eats Fish
|
|
|
|
|
|
+-----+ Area 51 Alien
+
|
+-----+
|
+-----+
桌子很大,大約有100列,有幾個sections
(例如Animal
),有多個,一個或沒有subsections
(例如Cow
, Whale
)。 如何才能做到這一點? 我知道表是原始的,但是更改它是不可能的,因為它超出了我的控制范圍。
編輯 :表格格式錯誤 。 這就是挑戰! 反正我不能修桌子。 因此,我試圖編寫一個插件來正確顯示它。
表格的較長示例 : http : //pasted.co/5d779888
好的,您可能需要添加一些小調整,但是我得到了一些東西:
var json_result = [];
var tds = document.querySelectorAll('td');
var ths = document.querySelectorAll('th');
var current_section = '';
var current_subsection = '';
var current_col = 0;
for(j in tds){
if(typeof tds[j].innerHTML !== "undefined"){
if(tds[j].innerHTML.indexOf('<input') > -1){
// checked box
json_result[current_section][current_subsection].push(ths[current_col+2].innerHTML);
current_col++;
}
else{
if(tds[j].innerHTML.indexOf(' ') == -1){
if(tds[j].attributes.rowspan){
//section
current_section = tds[j].innerHTML;
json_result[current_section] = [];
}
else{
//subsection
current_subsection = tds[j].innerHTML;
json_result[current_section][current_subsection] = [];
current_col = 0;
}
}
else{
current_col++;
}
}
}
}
console.log(json_result);
以您的長表示例為例:
{
"": {
"Grass Eater": [
"Grass Eater"
]
},
"Animal": {
"Cow": [
"Grass Eater",
"Plant Eater",
"Singer",
"Wings"
],
"Platypus": [
"Grass Eater",
"Plant Eater",
"Water Drinker",
"Singer",
"Wings"
],
"Horse": [
"Grass Eater",
"Plant Eater",
"Singer",
"Wings"
],
"Alien": [
"Grass Eater",
"Plant Eater",
"Singer"
],
"Doggo": [
"Grass Eater",
"Plant Eater",
"Singer"
],
"Dragon": [
"Grass Eater",
"Plant Eater",
"Singer"
],
"Horse Cart": [
"Grass Eater",
"Plant Eater",
"Singer"
],
"Alligator": [
"Grass Eater",
"Plant Eater"
]
},
"Mammal": {
"Cow": [
"Grass Eater",
"Plant Eater",
"Singer",
"Dancer",
"Someone",
"Wings"
],
"Platypus": [
"Grass Eater",
"Plant Eater",
"Water Drinker",
"Singer",
"Dancer",
"Someone",
"LSleepyerally",
"Running",
"Out",
"Of",
"Now",
"Vegan",
"Sausage",
"Cheese",
"Nugget",
"Wings",
"Foodie",
"Hoodie",
"Finished",
""
],
"Horse": [
"Grass Eater",
"Plant Eater",
"Singer",
"Dancer",
"Someone",
"Wings"
],
"Alien": [
"Grass Eater",
"Plant Eater",
"Singer",
"Dancer"
],
"Doggo": [
"Grass Eater",
"Plant Eater",
"Singer",
"Dancer"
],
"Dragon": [
"Grass Eater",
"Plant Eater",
"Dancer"
],
"Horse Cart": [
"Grass Eater",
"Plant Eater"
],
"Alligator": [
"Grass Eater",
"Plant Eater"
]
},
"Marsupial": {
"Cow": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Singer"
],
"Platypus": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Water Drinker",
"Singer"
],
"Horse": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Singer"
],
"Alien": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Singer"
],
"Doggo": [
"Grass Eater",
"Plant Eater",
"Singer"
],
"Dragon": [
"Grass Eater"
],
"Horse Cart": [
"Grass Eater",
"Plant Eater"
],
"Alligator": [
"Grass Eater"
]
},
"Sleepy Mammal": {
"Cow": [
"Grass Eater",
"Fish Eater",
"Singer",
"Words",
"Hoodie"
],
"Platypus": [
"Grass Eater",
"Fish Eater",
"Water Drinker",
"Singer",
"Words",
"Hoodie"
],
"Horse": [
"Grass Eater",
"Fish Eater",
"Singer",
"Words",
"Hoodie"
],
"Alien": [
"Grass Eater",
"Fish Eater",
"Singer"
],
"Doggo": [
"Grass Eater",
"Fish Eater",
"Singer",
"Words",
"Hoodie"
],
"Dragon": [
"Grass Eater",
"Fish Eater",
"Words",
"Hoodie"
],
"Horse Cart": [
"Grass Eater",
"Plant Eater",
"Fish Eater"
],
"Alligator": [
"Grass Eater",
"Fish Eater"
]
},
"Squsihy": {
"Cow": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Singer"
],
"Platypus": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Water Drinker",
"Singer"
],
"Horse": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Singer"
],
"Alien": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Singer"
],
"Doggo": [
"Grass Eater",
"Plant Eater",
"Fish Eater",
"Singer"
],
"Dragon": [
"Grass Eater",
"Plant Eater",
"Fish Eater"
],
"Horse Cart": [
"Grass Eater",
"Plant Eater",
"Fish Eater"
],
"Alligator": [
"Grass Eater",
"Plant Eater",
"Fish Eater"
]
},
"Plushies": {
"Cow": [
"Grass Eater",
"Plant Eater",
"Singer",
"Someone",
"Running",
"Out",
"Of",
"Fake"
],
"Platypus": [
"Grass Eater",
"Plant Eater",
"Water Drinker",
"Oil Drinker",
"Milk Drinker",
"Singer",
"Someone",
"LSleepyerally",
"Running",
"Out",
"Of",
"Fake",
"Words",
"Now",
"Vegan",
"Sausage",
"Cheese",
"Nugget",
"Wings",
"Foodie",
"Hoodie",
"Finished",
""
],
"Horse": [
"Grass Eater",
"Plant Eater",
"Oil Drinker",
"Milk Drinker",
"Singer",
"Someone",
"LSleepyerally",
"Running",
"Out",
"Of",
"Fake",
"Words",
"Now",
"Vegan",
"Sausage",
"Cheese",
"Nugget",
"Wings",
"Foodie",
"Hoodie",
"Finished",
""
],
"Alien": [
"Grass Eater",
"Plant Eater"
]
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.