[英]Extract the data between script tag using java regular expression
我想提取腳本標記之間給定的數據。 我也為此做了一個正則表達式,但是它不適用於此字符串-
<script>
var arrowimages = {
down: ['downarrowclass', 'Images/arrow-down.gif', 23],
right: ['rightarrowclass', 'Images/submenu-pointer.png']
}
var jqueryslidemenu = {
animateduration: {
over: 400,
out: 400
}, //duration of slide in/ out animation, in milliseconds
buildmenu: function (menuid, arrowsvar) {
jQuery(document).ready(function ($) {
var $mainmenu = $("#" + menuid + ">ul")
var $headers = $mainmenu.find("ul").parent()
$headers.each(function (i) {
var $curobj = $(this)
var $subul = $(this).find('ul:eq(0)')
this._dimensions = {
w: this.offsetWidth,
h: this.offsetHeight,
subulw: $subul.outerWidth(),
subulh: $subul.outerHeight()
}
this.istopheader = $curobj.parents("ul").length == 1 ? true : false
$subul.css({
top: this.istopheader ? this._dimensions.h + "px" : 0
})
$curobj.children("a:eq(0)").css(this.istopheader ? {
paddingRight: arrowsvar.down[2]
} : {}).append(
'<img src="' + (this.istopheader ? arrowsvar.down[1] : arrowsvar.right[1]) + '" class="' + (this.istopheader ? arrowsvar.down[0] : arrowsvar.right[0]) + '" style="border:0;top:12px;" />'
)
$curobj.hover(
function (e) {
var $targetul = $(this).children("ul:eq(0)")
this._offsets = {
left: $(this).offset().left,
top: $(this).offset().top
}
var menuleft = this.istopheader ? 0 : this._dimensions.w
menuleft = (this._offsets.left + menuleft + this._dimensions.subulw > $(window).width()) ? (this.istopheader ? -this._dimensions.subulw + this._dimensions.w : -this._dimensions.w) : menuleft
if($targetul.queue().length <= 1) //if 1 or less queued animations
$targetul.css({
left: menuleft + "px",
width: this._dimensions.subulw + 'px'
}).slideDown(jqueryslidemenu.animateduration.over)
},
function (e) {
var $targetul = $(this).children("ul:eq(0)")
$targetul.slideUp(jqueryslidemenu.animateduration.out)
}
) //end hover
$curobj.click(function () {
$(this).children("ul:eq(0)").hide()
})
}) //end $headers.each()
$mainmenu.find("ul").css({
display: 'none',
visibility: 'visible'
})
}) //end document.ready
}
}
//build menu with ID="myslidemenu" on page:
jqueryslidemenu.buildmenu("myjquerymenu", arrowimages)
</script>
我的正則表達式是- <script[^>]*>(.*?)</script>
其他常規腳本標簽(例如<script type="text/javascript" src="JsFiles/jquery.min.js"></script>
由我的正則表達式檢索,但我不知道為什么它不適用於該特定<script type="text/javascript" src="JsFiles/jquery.min.js"></script>
串。 我需要文本之間的數據來修改該數據,因此也需要分組。 我也嘗試過jsoup來完成此任務-
Elements scripts = doc.select("script");
for(Element script: scripts) {
System.out.println("src " + script);
String lineread = script.toString();
String data = script.data();
if(!data.isEmpty()) {
System.out.println(data);
Pattern p = Pattern.compile("\\\"([^\\\"]*)\\\"|'([^']*)'");
Matcher m = p.matcher(lineread);
while(m.find()) {
if(m.group(0).contains(".axd") || m.group(0).contains(".JPG") || m.group(0).contains(".jpg") || m.group(0).contains(".jpeg") || m.group(0).contains(".png") || m.group(0).contains(".js") || m.group(0).contains(".ico") || m.group(0).contains(".gif")) {
System.out.println("m.g(0) " + m.group(0));
System.out.println("m.g(1) " + m.group(1));
String changepath;
Pattern p1 = Pattern.compile("src=(.*?)>|src=\"(.*?)\"");
Matcher m1 = p1.matcher(m.group(0));
if(m1.find()) {
if(m1.group(0).contains(".axd") || m1.group(0).contains(".JPG") || m1.group(0).contains(".jpg") || m1.group(0).contains(".jpeg") || m1.group(0).contains(".png") || m1.group(0).contains(".js") || m1.group(0).contains(".ico") || m1.group(0).contains(".gif")) {
System.out.println("inner " + m1.group(0));
changepath = "\"" + main_url + "" + m1.group(1).replace("\"", "").replace("'", "") + "\"";
lineread = lineread.replace(m1.group(1), changepath);
System.out.println("data " + lineread);
}
} else {
changepath = "\"" + main_url + "" + m.group(0).replace("\"", "").replace("'", "") + "\"";
lineread = lineread.replace(m.group(0), changepath);
System.out.println("data in src " + lineread);
}
}
}
script = script.text(lineread);
System.out.println("final script " + script);
}
}
但是問題是我在腳本中獲取的值(最后一個變量)刪除了腳本標記之間數據中存在的所有空格。
所以基本上我想要一個正則表達式來獲取腳本標記之間的數據,或者我如何在不丟失第二個解決方案空間的情況下獲取數據。
您可以使用lib js2xml。 https://github.com/redapple/js2xml
import js2xml
jscode = """var arrowimages = {
down: ['downarrowclass', 'Images/arrow-down.gif', 23],
right: ['rightarrowclass', 'Images/submenu-pointer.png']
}
var jqueryslidemenu = {
animateduration: {
over: 400,
out: 400
}, //duration of slide in/ out animation, in milliseconds
buildmenu: function (menuid, arrowsvar) {
jQuery(document).ready(function ($) {
var $mainmenu = $("#" + menuid + ">ul")
var $headers = $mainmenu.find("ul").parent()
$headers.each(function (i) {
var $curobj = $(this)
var $subul = $(this).find('ul:eq(0)')
this._dimensions = {
w: this.offsetWidth,
h: this.offsetHeight,
subulw: $subul.outerWidth(),
subulh: $subul.outerHeight()
}
this.istopheader = $curobj.parents("ul").length == 1 ? true : false
$subul.css({
top: this.istopheader ? this._dimensions.h + "px" : 0
})
$curobj.children("a:eq(0)").css(this.istopheader ? {
paddingRight: arrowsvar.down[2]
} : {}).append(
'<img src="' + (this.istopheader ? arrowsvar.down[1] : arrowsvar.right[1]) + '" class="' + (this.istopheader ? arrowsvar.down[0] : arrowsvar.right[0]) + '" style="border:0;top:12px;" />'
)
$curobj.hover(
function (e) {
var $targetul = $(this).children("ul:eq(0)")
this._offsets = {
left: $(this).offset().left,
top: $(this).offset().top
}
var menuleft = this.istopheader ? 0 : this._dimensions.w
menuleft = (this._offsets.left + menuleft + this._dimensions.subulw > $(window).width()) ? (this.istopheader ? -this._dimensions.subulw + this._dimensions.w : -this._dimensions.w) : menuleft
if($targetul.queue().length <= 1) //if 1 or less queued animations
$targetul.css({
left: menuleft + "px",
width: this._dimensions.subulw + 'px'
}).slideDown(jqueryslidemenu.animateduration.over)
},
function (e) {
var $targetul = $(this).children("ul:eq(0)")
$targetul.slideUp(jqueryslidemenu.animateduration.out)
}
) //end hover
$curobj.click(function () {
$(this).children("ul:eq(0)").hide()
})
}) //end $headers.each()
$mainmenu.find("ul").css({
display: 'none',
visibility: 'visible'
})
}) //end document.ready
}
}
//build menu with ID="myslidemenu" on page:
jqueryslidemenu.buildmenu("myjquerymenu", arrowimages)"""
parsed = js2xml.parse(jscode)
print js2xml.pretty_print(parsed)
輸出示例:
<program>
<var name="arrowimages">
<object>
<property name="down">
<array>
<string>downarrowclass</string>
<string>Images/arrow-down.gif</string>
<number value="23"/>
</array>
</property>
<property name="right">
<array>
<string>rightarrowclass</string>
<string>Images/submenu-pointer.png</string>
</array>
</property>
</object>
</var>
然后,您可以解析XML。 (使用xpath更容易)
納爾遜。
而不是使用data()
方法,請嘗試html()
方法。 行String data = script.data();
變成String data = script.html()
。
另一方面,如果要更改腳本標記的src
屬性,則可以簡單地告訴Jsoup為您找到它們:
Elements scripts = doc.select("script[src]");
for(Element script: scripts) {
String src = script.attr("src");
if (src.contains(".axd")) {
script.attr("src", main_url + src);
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.