简体   繁体   中英

parse image in dom parser using jsoup in android

i am trying to get the rss feed of this website:

http://www.phonearena.com/feed

here is my domparser activity:

public class DOMParser {
private RSSFeed _feed = new RSSFeed();

public RSSFeed parseXml(String xml) {

    URL url = null;
    try {
        url = new URL(xml);
    } catch (MalformedURLException e1) {
        e1.printStackTrace();
    }

    try {

        DocumentBuilderFactory dbf;
        dbf = DocumentBuilderFactory.newInstance();
        DocumentBuilder db = dbf.newDocumentBuilder();


        Document doc = db.parse(new InputSource(url.openStream()));
        doc.getDocumentElement().normalize();

        NodeList nl = doc.getElementsByTagName("item");
        NodeList itemChildren = null;
        Node currentItem = null;
        Node currentChild = null;
        int length = nl.getLength();

        for (int i = 0; i < length; i++) {
             currentItem = nl.item(i);
            RSSItem _item = new RSSItem();

            NodeList nchild = currentItem.getChildNodes();
            int clength = nchild.getLength();


            for (int j = 0; j < clength; j++) {

                currentChild = nchild.item(j);
                String theString = null;
                String nodeName = currentChild.getNodeName();

                theString = nchild.item(j).getFirstChild().getNodeValue();

                if (theString != null) {
                    if ("title".equals(nodeName)) {

                        _item.setTitle(theString);
                    }

                    else if ("description".equals(nodeName)) {

                        _item.setDescription(theString);

                        // Parse the html description to get the image url
                        String html = theString;
                        org.jsoup.nodes.Document docHtml = Jsoup
                                .parse(html);
                        Elements imgEle = docHtml.select("img");
                        _item.setImage(imgEle.attr("src"));
                    }

                    else if ("pubDate".equals(nodeName)) {


                        String formatedDate = theString.replace(" +0000",
                                "");
                        _item.setDate(formatedDate);
                    }

                }
            }


            _feed.addItem(_item);
        }

    } catch (Exception e) {
    }


    return _feed;
}
}     

everything is working fine except the image which i am trying to get through jsoup.

can anybody tell what i am doing wrong or missing?

The variable theString needs to be unescaped before passing it to Jsoup.

else if ("description".equals(nodeName)) {
    _item.setDescription(theString);

    // Unescape then Parse the html description to get the image url
    Element imgEle = Jsoup.parse( //
            Parser.unescapeEntities( //
                  Parser.xmlParser().parseInput(theString, "").outerHtml(), //
                  true //
            )) //
            .select("img").first();

    if (imgEle != null) {
        _item.setImage(imgEle.attr("src"));
    }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM