简体   繁体   中英

Not able to download zip file using httpwebrequest through c# code. It gets downloaded through browser

I want to download the zip file from one of the website https://eqrreportviewer.ferc.gov/ . The way in which the zip file gets downloaded is that you click on the filing inquiries tab first. In the reportType dropdown select SubmissionsBydate and in export dropdown select CSV. Now click on submit button and the zip file gets downloaded. I want to automate this process. I have written a code in C# by capturing the request along with its headers and passing that details to the site, but I am not able to download the file through code.

This is the code that I have written:

public static string PageSourceCode { get; set; }

//The ASP.NET SessionID to add validation to posts
public static string SessionID { get; set; }

//The value we are posting to the page on subsequent calls
public static string PostBackValue { get; set; }

public static string AcquisitionURL = "https://eqrreportviewer.ferc.gov";
static void Main(string[] args)
{
    Acquire();
}

private static void Acquire()
{
    GetLandingPage();
    PopulatePostBackValueForSubmitBtn();
    PostToPageForSubmitBtn();
}

private static void GetLandingPage()
{
    string mainPageOutput = string.Empty;
    HttpWebRequest objRequestLandingPage = (HttpWebRequest)WebRequest.Create(AcquisitionURL);
    objRequestLandingPage.Method = WebRequestMethods.Http.Get;
    objRequestLandingPage.Headers.Add("Cache-Control", "max-age=0");
    objRequestLandingPage.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9";
    objRequestLandingPage.Headers.Add("Accept-Encoding", "gzip, deflate, br");
    objRequestLandingPage.Headers.Add("Accept-Language", "en-US,en;q=0.9");
    objRequestLandingPage.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36";
    objRequestLandingPage.Headers.Add("Sec-Fetch-Dest", "document");
    objRequestLandingPage.Headers.Add("Sec-Fetch-Mode", "navigate");
    objRequestLandingPage.Headers.Add("Sec-Fetch-Site", "none");
    objRequestLandingPage.Headers.Add("Sec-Fetch-User", "?1");
    objRequestLandingPage.Headers.Add("Upgrade-Insecure-Requests", "1");
    //objRequestLandingPage.Headers.Add("Connection", "keep-alive");
    objRequestLandingPage.KeepAlive = true;
    objRequestLandingPage.Host = "eqrreportviewer.ferc.gov";

    using (WebResponse objResponseLandingPage = objRequestLandingPage.GetResponse())
    {
        WebHeaderCollection headers = objResponseLandingPage.Headers;

        using (Stream streamLandingPage = objResponseLandingPage.GetResponseStream())
        using (StreamReader streamReaderLandingPage = new StreamReader(streamLandingPage))
        {
            mainPageOutput = streamReaderLandingPage.ReadToEnd();
        }
        SessionID = headers["Set-Cookie"];
    }

    SessionID = StripCookie(SessionID);
    //Set the source code of the page
    PageSourceCode = mainPageOutput;
}

private static void PopulatePostBackValueForSubmitBtn()
{
    if (!String.IsNullOrEmpty(PageSourceCode))
    {
        // get fields from landing page
        Dictionary<string, string> formFields = GetFormFields(PageSourceCode);
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelSummaryReports$ddlReportTypeSum"] = "0";
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelSummaryReports$ddlReportPeriodSum"] = "650";
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelSummaryReports$ListSearchExtender1_ClientState"] = String.Empty;
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$ddlReportType"] = "4";
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$txtFromSubmissionDate"] = System.DateTime.Now.Date.AddDays(-30).ToShortDateString();
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$txtToSubmissionDate"] = System.DateTime.Now.Date.ToShortDateString();
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$ddlExport"] = "2";
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$btnSubmitOptional"] = "Submit";
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$txtCID"] = String.Empty;
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$txtFilingOrg"] = String.Empty;
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$ddlQuarter"] = "Pick";
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$ddlDownloadType"] = "CSV";
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$txtName"] = String.Empty;
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$txtEmail"] = String.Empty;
        formFields["__EVENTTARGET"] = String.Empty;
        formFields["__EVENTARGUMENT"] = String.Empty;
        formFields["__LASTFOCUS"] = String.Empty;
        formFields["__AjaxControlToolkitCalendarCssLoaded"] = String.Empty;
        formFields["TabContainerReportViewer_ClientState"] = "{\"ActiveTabIndex\" : 0,\"TabState\": [true,true]}";
        formFields["TabContainerReportViewer_TabPanelReporting_TabContainerReports_ClientState"] = "{\"ActiveTabIndex\" : 1,\"TabState\": [true,true]}";
        formFields["TabContainerReportViewer_TabPanelDownloads_TabContainerDownloads_ClientState"] = "{\"ActiveTabIndex\" : 0,\"TabState\": [true,true]}";
        formFields["__VIEWSTATE"] = ViewState;
        formFields["__VIEWSTATEGENERATOR"] = ViewStateGenerator;
        formFields["__VIEWSTATEENCRYPTED"] = ViewStateEncrypted;
        string postString = FormatPostString(formFields);
        PostBackValue = postString;
    }
}

private static void PostToPageForSubmitBtn()
{
    HttpWebRequest objRequestPostPage = (HttpWebRequest)WebRequest.Create(AcquisitionURL);

    objRequestPostPage.Method = WebRequestMethods.Http.Post;
    objRequestPostPage.ContentLength = PostBackValue.Length;
    objRequestPostPage.ContentType = "application/x-www-form-urlencoded";
    objRequestPostPage.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9";
    objRequestPostPage.KeepAlive = true;
    objRequestPostPage.Host = "eqrreportviewer.ferc.gov";
    objRequestPostPage.Headers.Add("Cache-Control", "max-age=0");
    objRequestPostPage.Headers.Add("Sec-Fetch-Dest", "document");
    objRequestPostPage.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36";
    objRequestPostPage.Headers.Add("Origin", "https://eqrreportviewer.ferc.gov");
    objRequestPostPage.Headers.Add("Sec-Fetch-Site", "same-origin");
    objRequestPostPage.Headers.Add("Sec-Fetch-Mode", "navigate");
    objRequestPostPage.Referer = "https://eqrreportviewer.ferc.gov/";
    objRequestPostPage.Headers.Add("Accept-Encoding", "gzip, deflate,br");
    objRequestPostPage.Headers.Add("Accept-Language", "en-US,en;q=0.9");
    //Pass in the ASP.NET Session ID
    objRequestPostPage.Headers.Add("Cookie", SessionID);

    objRequestPostPage.Headers.Add("Upgrade-Insecure-Requests", "1");
    objRequestPostPage.Headers.Add("Sec-Fetch-User", "?1");

    objRequestPostPage.ServicePoint.Expect100Continue = false;
    StreamWriter streamWriterPostPage = new StreamWriter(objRequestPostPage.GetRequestStream());
    //Post the arguments
    streamWriterPostPage.Write(PostBackValue);
    streamWriterPostPage.Close();

    //Get response
    HttpWebResponse responsePostPage = (HttpWebResponse)objRequestPostPage.GetResponse();

    WebHeaderCollection responseHeaders = responsePostPage.Headers;

    Stream responseStream = responsePostPage.GetResponseStream();

        StreamReader reader = new StreamReader(responseStream);
    PageSourceCode = reader.ReadToEnd();

    using (FileStream file = new FileStream(@"C:\Test\test.csv", FileMode.Create, FileAccess.Write))
    {
         WriteFile(responseStream, file);
    }
}

Can anyone let me know if there is something wrong that I am doing. Right now all the values are hard coded but if it works I can organize that properly.
Also I don't get the Content Disposition response header in the response that I am getting but I get this header when its gets runned from Chrome browser.

What is the code that I can do differently or if I am missing something? Any help/suggestion would be great help moving forward with this issue.

I was not able to do this using C#.
Finally I used python in combination with selenium and chrome web driver to get the task done.

from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-extensions")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
options.add_experimental_option("prefs", {"download.default_directory":"/databricks/driver"})
driver = webdriver.Chrome(chrome_options=options)
driver.implicitly_wait(5)

url = "https://eqrreportviewer.ferc.gov/"
driver.get(url)
driver.implicitly_wait(5)
#Filing Inquiries
driver.find_element_by_xpath('//*[@id="__tab_TabContainerReportViewer_TabPanelReporting_TabContainerReports_TabPanelFilingInquiries"]').click()
driver.implicitly_wait(5)
#Submission by Date
driver.find_element_by_xpath('//*[@id="TabContainerReportViewer_TabPanelReporting_TabContainerReports_TabPanelFilingInquiries_ddlReportType"]/option[5]').click()
driver.implicitly_wait(5)
#CSV
driver.find_element_by_xpath('//*[@id="TabContainerReportViewer_TabPanelReporting_TabContainerReports_TabPanelFilingInquiries_ddlExport"]/option[2]').click()
driver.implicitly_wait(15)
#Submit
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="TabContainerReportViewer_TabPanelReporting_TabContainerReports_TabPanelFilingInquiries_btnSubmitOptional"]')))
element.click()
driver.implicitly_wait(15) #putting wait here to make sure file gets downloaded before driver is stopped.
driver.quit()

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM