[英]Regex in Java PHP like output
So here is the plan. 所以这是计划。
In PHP very simple to make, but it seems to me impossible with native Java. 用PHP制作起来非常简单,但是在我看来,使用本机Java是不可能的。
How it would look like in PHP: 在PHP中的样子:
<?php
//the html code, maybe multilined
$htmlCode = file_get_contents("https://stackoverflow.com");
$replacement = Array("\n", "\cr", "\r", "\c", "\t");
//to make it single lined like a long string
$htmlCode = str_replace($replacement, "", $htmlCode);
//Regex part
$pattern = '#<a href="(.*)">.*</a>#siU';
preg_match_all($pattern, $htmlCode, $results, PREG_SET_ORDER);
//Print it out
echo "<pre>Results: ".print_r($results,true)."</pre>";
?>
Results would look like: 结果如下:
Array = ( [0] = " http://abc.net ", [1] = " http://test.com ", ... );
Array =([0] =“ http://abc.net ”,[1] =“ http://test.com ”,...);
How can I do the same in Java? 如何在Java中做同样的事情?
(I already tried it my own with a way I thought how it could work, but it I think it's to overblown. Did it with JerichoHTMLParser.) (我已经以一种自己认为可行的方式尝试了它,但是我认为它已经被夸大了。使用JerichoHTMLParser做到了。)
Init.java 初始化程序
package test.java.regex;
import java.awt.*;
import java.awt.event.*;
import javax.swing.*;
import net.htmlparser.jericho.*;
import java.net.*;
import java.io.*;
import java.util.*;
import java.util.regex.*;
public class init extends JFrame {
private JLabel lblKeyword;
private JTextField keyword;
private JButton exec;
private JScrollPane sp;
private JTextArea output;
private Pattern pattern;
private Matcher matcher;
private static final String thePATTERN = "<a href=\"(http://[^\"]+)\".*</a>";
public init(){
this.pattern = Pattern.compile(thePATTERN);
this.setTitle("HTML Element Link Position");
this.setResizable(true);
this.setLayout(new GridBagLayout());
GridBagConstraints gbc;
this.lblKeyword = new JLabel("Search for:");
gbc=makeGBC(0, 0, 1, 1);
gbc.anchor = GridBagConstraints.WEST;
this.add(this.lblKeyword,gbc);
this.keyword = new JTextField();
this.keyword.setText("test");
gbc=makeGBC(0, 1, 1, 1);
gbc.fill=GridBagConstraints.HORIZONTAL;
this.add(this.keyword, gbc);
this.exec = new JButton("Execute");
gbc=makeGBC(0, 2, 0, 1);
gbc.anchor = GridBagConstraints.EAST;
gbc.fill=GridBagConstraints.BOTH;
gbc.weightx=1.0;
gbc.weighty=0.1;
this.add(this.exec, gbc);
this.output = new JTextArea();
gbc=makeGBC(0, 3, 1, 2);
gbc.fill=GridBagConstraints.BOTH;
gbc.weightx=1.0;
gbc.weighty=1.0;
this.add(this.output, gbc);
this.sp = new JScrollPane(this.output);
this.sp.setPreferredSize(new Dimension(500,100));
gbc = makeGBC(0, 3, 1, 1);
gbc.fill=GridBagConstraints.BOTH;
gbc.weightx=1.0;
gbc.weighty=1.0;
this.add(this.sp, gbc);
this.exec.addActionListener(
new ActionListener() {
public void actionPerformed(ActionEvent e) {
String pos = getPosition();
System.out.println("Position: "+pos);
//output.setText( pos );
}
});
this.addWindowListener(new WindowAdapter() {
public void windowClosing(WindowEvent e) {
System.exit(0);
}
});
this.setPreferredSize(new Dimension(700,600));
this.pack();
this.setLocation
(
(Toolkit.getDefaultToolkit().getScreenSize().width-this.getWidth())/2,
(Toolkit.getDefaultToolkit().getScreenSize().height-this.getHeight())/2
);
this.setVisible(true);
}
private String getPosition()
{
String urlString = "";
String result="";
if( !this.keyword.getText().isEmpty() )
{
URL url;
URLConnection uc;
StringBuilder parsedContentFromUrl = new StringBuilder();
urlString = "http://stackoverflow.com";
MicrosoftConditionalCommentTagTypes.register();
PHPTagTypes.register();
PHPTagTypes.PHP_SHORT.deregister();
MasonTagTypes.register();
try{
url = new URL( urlString );
uc = url.openConnection();
uc.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0");
uc.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
uc.connect();
uc.getInputStream();
BufferedInputStream in = new BufferedInputStream(uc.getInputStream());
int ch;
while ((ch = in.read()) != -1) {
parsedContentFromUrl.append((char) ch);
}
Source source = new Source( parsedContentFromUrl );
result = source.toString();
this.output.setText( result );
matcher = this.pattern.matcher(result);
boolean matches = matcher.matches();
if( matches ){
this.output.setText( "found" );
}
// BufferedReader inl=null;
// inl = new BufferedReader( new InputStreamReader(uc.getInputStream()) );
// StringBuffer sb = new StringBuffer("");
// String line = "";
// //String NL = System.getProperty("line.separator");
// while ((line = inl.readLine()) != null) {
// sb.append(line);
// }
// inl.close();
// result = sb.toString();
// BufferedInputStream in1 = new BufferedInputStream(uc.getInputStream());
// ByteArrayOutputStream baos = new ByteArrayOutputStream();
// int ch1;
// while ((ch1 = in1.read()) != -1) {
// baos.write((byte)ch1);
// }
// baos.close();
// String st = new String(baos.toByteArray(), "UTF-8");
//result = source.getRenderer().toString();
}
catch (Exception e) {
result = e.toString();
}
//
return result;
}else{
return "No keyword given";
}
}
private GridBagConstraints makeGBC(int gx, int gy, int gw, int gh)
{
GridBagConstraints gbc=new GridBagConstraints();
gbc.gridx=gx;
gbc.gridy=gy;
gbc.gridwidth=gw;
gbc.gridheight=gh;
gbc.fill=GridBagConstraints.NONE;
gbc.weightx=0;
gbc.weighty=0;
gbc.anchor=GridBagConstraints.CENTER;
gbc.insets=new Insets(2,2,2,2);
return gbc;
}
public static void main(String[] args) {
try {
for (LookAndFeelInfo laf : UIManager.getInstalledLookAndFeels()) {
if ("Nimbus".equals(laf.getName())) {
UIManager.setLookAndFeel(laf.getClassName());
break;
}
}
} catch (Exception e) {
// If Nimbus is not available, you can set the GUI to another look and feel.
}
new init();
}
}
Thanks in advance for any help. 在此先感谢您的帮助。 :)
:)
Yes it may be done shorter, but let's start with that code, as it is almost there. 是的,它可以做的更短一些,但是让我们从该代码开始,因为它已经差不多了。
uc.getInputStream();
Can be removed of course, as it is fetched & used the next line. 当然可以删除,因为它已提取并用于下一行。
boolean matches = matcher.matches();
if (matches) {
this.output.setText( "found" );
}
has an error: in java .match
requires a match for the entire string. 有一个错误:在java
.match
需要对整个字符串进行匹配。
while (matcher.find()) {
output.setText(matcher.group(1));
// Or something different, as this keeps only the last one found.
}
And I am not ackquainted with Source. 我不了解Source。
One could read all in a ByteArrayOutputStream as you did too. 您也可以像这样在ByteArrayOutputStream中读取所有内容。
String st = new String(baos.toByteArray(), StandardCharsets.ISO_8859_1);
You could use ISO-8859-1 as this would not encoding errors which UTF-8 might encounter when the page is not in UTF-8. 您可以使用ISO-8859-1,因为这不会编码当页面不在 UTF-8中时UTF-8可能遇到的错误。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.