繁体   English   中英

Android WebView无法返回所需的HTML

[英]Android WebView not returning desired HTML

所以快速概述我正在做的事情我正在使用Android Webview来渲染JavaScript,然后从javascript中读取HTML来解析它。

我目前在从名为Sport Chek的网站上检索HTML时遇到问题。

以下是我的SportChekSearch类的代码:

public class SportChekSearch extends SearchQuery{

public Elements finalDoc;
private ArrayList<Item> processed;
private final Handler uiHandler = new Handler();
public int status = 0;

//This basically is just so that the class knows which Activity we're working with
private Context c;

protected class JSHtmlInterface {
    @android.webkit.JavascriptInterface
    public void showHTML(String html) {
        final String htmlContent = html;

        uiHandler.post(
                new Runnable() {
                    @Override
                    public void run() {
                        Document doc = Jsoup.parse(htmlContent);
                    }
                }
        );
    }
}

/**
 * Constructor method
 * @param context The context taken from the webview (So that the asynctask can show progress)
 */
public SportChekSearch(Context context, String query) {

    final Context c = context;

    try {
        final WebView browser = new WebView(c);
        browser.setVisibility(View.INVISIBLE);
        browser.setLayerType(View.LAYER_TYPE_NONE, null);
        browser.getSettings().setJavaScriptEnabled(true);
        browser.getSettings().setBlockNetworkImage(true);
        browser.getSettings().setDomStorageEnabled(true);
        browser.getSettings().setCacheMode(WebSettings.LOAD_NO_CACHE);
        browser.getSettings().setLoadsImagesAutomatically(false);
        browser.getSettings().setGeolocationEnabled(false);
        browser.getSettings().setSupportZoom(false);
        browser.getSettings().setUserAgentString("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36");
        browser.addJavascriptInterface(new JSHtmlInterface(), "JSBridge");

        browser.setWebViewClient(
                new WebViewClient() {

                    @Override
                    public void onPageStarted(WebView view, String url, Bitmap favicon) {
                        super.onPageStarted(view, url, favicon);
                    }

                    @Override
                    public void onPageFinished(WebView view, String url) {
                        browser.loadUrl("javascript:window.JSBridge.showHTML('<html>'+document.getElementsByTagName('html')[0].innerHTML+'</html>');");
                    }
                }
        );


            browser.loadUrl("https://www.sportchek.ca/search.html#q=" + query.replaceAll(" ", "+") + "&lastVisibleProductNumber=3");
            browser.loadUrl(browser.getUrl());
            final String link = browser.getUrl();
            new fetcher(c).execute(link);



    }
    catch(Exception e){
        e.printStackTrace();
    }

    //Get the link from the WebView, and save it in a final string so it can be accessed from worker thread


}

/**
 * This subclass is a worker thread meaning it does work in the background while the user interface is doing something else
 * This is done to prevent "lag".
 * To call this class you must write fetcher(Context c).execute(The link you want to connect to)
 *
 */
class fetcher extends AsyncTask<String, Void, Elements> {

    Context mContext;
    ProgressDialog pdialog;

    public fetcher(Context context) {
        mContext = context;
    }

    @Override
    protected void onPreExecute() {
        super.onPreExecute();
        pdialog = new ProgressDialog(mContext);
        pdialog.setTitle(R.string.finding_results);
        pdialog.setCancelable(false);
        pdialog.show();
    }

    //This return elements because the postExecute() method needs an Elements object to parse its results
    @Override
    protected Elements doInBackground(String... strings) {

        //You can pass in multiple strings, so this line just says to use the first string
        String link = strings[0];

        //For Debug Purposes, Do NOT Remove - **Important**
        System.out.println("Connecting to: " + link);

        try {
            doc = Jsoup.connect(link)
                    .ignoreContentType(true)
                    .userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36")
                    .timeout(10000)
                    .get();


            finalDoc = doc.select("body section.product-grid-wrapper");

            System.out.println(finalDoc.toString());



        } catch (IOException e) {
            e.printStackTrace();
        }

        return finalDoc;
    }


    @Override
    protected void onPostExecute(Elements result) {


        //This line clears the list of info in the Search activity
        //I should probably be using a getter method but adapter is a static variable so it shouldn't matter


        //parse seperates document into elements
        //crunch results formats those elements into item objects
        //I am saving the result of this to an ArrayList<Item> called "processed"
        processed = crunchResults(result);

        //For debug purposes, do NOT remove - **Important**
        System.out.println(processed.size() + " results have been crunched by Sport Chek.");

        //Adds all of the processed results to the list of info in Search activity
        ClothingSearch.adapter.addAll(processed);


        //For debug purposes, do NOt remove - **Important
        System.out.println("Adapter has been notified by Sport Chek.");

        //Closes the progress dialog called pdialog assigned to the AsyncTask

        pdialog.dismiss();

        ClothingSearch.adapter.notifyDataSetChanged();
        SearchQueueHandler.makeRequest(mContext, processed, SearchQueueHandler.CLOTHING_SEARCH);




    }
}



    public ArrayList<Item> crunchResults(Elements e){

    ArrayList<Item> results = new ArrayList<Item>();

    try {

        for (int i = 0; i < e.size(); i++) {

            Element ele = e.get(i);


            String link = "https://www.sportchek.ca" + ele.select(" a.product-grid__link").attr("href");
            System.out.println("https://www.sportchek.ca" + ele.select(" a.product-grid__link").attr("href"));
            String title = ele.select(" span.product-title-text").text();

            String pricestring = ele.select(" span.product-price__wrap").text();
            price = Double.parseDouble(pricestring.substring(pricestring.lastIndexOf("$")));
            System.out.println(pricestring);

            //*******************************************

            String store = "Sport Chek";



                //Adds the formatted item to an ArrayList of items
                results.add(new Item(title, store, price, link));


            //Prints the object's to String to console
            //For debug purposes, do NOT remove - **Important
            System.out.println(results.get(i).toString());
        }
    } catch (Exception a){
        a.printStackTrace();
    }

    return results;
}

public int getStatus(){
    return status;
}

}

两个相关的方法是我的AsyncTask中的doInBackground和crunchResults方法。

以下是在实际网站上使用Ctrl + Shift + I得到的结果(Desired Result):

期望的结果

但是当运行上面的代码并在这里使用println时,我得到的结果是标签部分class =“product-grid-wrapper”:

<section class="product-grid-wrapper"> 
<ul data-module-type="SearchProductGrid" class="product-grid__list product-grid__list_quickview"> 
<!-- #product-grid__item-template --> 
</ul>
</section>

任何人都可以帮我弄清楚为什么我没有得到我想要的结果?

所有帮助表示赞赏

编辑:对于收集println数据的特定搜索,链接是https://www.sportchek.ca/search.html#q=men+coat&lastVisibleProductNumber=3

看起来你实际得到的是服务器发送的实际html,并且你的'期望结果'是JavaScript在JavaScript运行后的样子。

如果我在Chrome中使用“查看源代码”,那么我的“实际”就是我所看到的,而如果我使用Chrome的DOM检查器,则会看到“理想的结果”。

在进一步检查时,我发现你实际上并没有从浏览器中获取HTML,而是(间接地)使用JSoup的Connection对象来直接获取HTML。 不幸的是,这不会运行Javascript。

相反,您将不得不在JavaScript运行后从WebView获取HTML。 有关可能的方法,请参阅如何从WebView获取网页内容?

然后,您将从中获得的HTML提供给JSoup

Jsoup.parse(html);

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM