簡體   English   中英

使用 R 的帶有 httr 包的 POST 請求

[英]POST request with httr package using R

我想從以下站點使用httr獲取 POST 請求的輸出:

http://www.e-grunt.ba

當您單擊“ZK Ulošci”時,您可以看到提交表單。

在那里我想發送POST請求並獲取輸出。 例如,您可以從下拉窗口中選擇任何內容並在“Broj Uloška”字段中輸入 1,然后單擊“Traži”。

這是我的嘗試:

library(httr)
library(tidyverse)
library(rvest)

    output <- httr::POST(
      "http://www.e-grunt.ba/home.jsf",
      body = list(
        "form:court_focus" = "440",
        "form:cuTransferLast" = "17.07.2019",
        "form:municipality_input" = "4400000001",
        "form:mpart_focus" = "44000087",
        "form:folder" = 1,
        `recaptcha-token` = "some token",
        submit = "form:j_idt61"
        ),
      add_headers(Referer = "http://www.e-grunt.ba/"),
      encode = "form",
      verbose()
    )

但這只是返回主頁的內容。

我知道使用 (R)Selenium 更容易,但如果可能的話,我想使用httrPOST來完成。

我找到了抓取這個 ASP.net 站點的方法。 如果有人需要類似的東西,我將提供代碼:

start_session <- function() {
  p <- html_session(
    "http://www.e-grunt.ba", 
    user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36")
  )
  viewState <- p %>% html_nodes("input") %>% .[[2]] %>% html_attr("value") 
  p <- rvest:::request_POST(
    p, 
    "http://www.e-grunt.ba/home.jsf",
    add_headers(
      'Referer' = 'http://www.e-grunt.ba'
    ),
    body = list(
      "javax.faces.partial.ajax" = "true",
      "javax.faces.source" = "j_idt8:j_idt15",
      "javax.faces.partial.execute" = "@all",
      "javax.faces.partial.render" = "content",
      "j_idt8:j_idt15" = "j_idt8:j_idt15",
      "j_idt8" = 'j_idt8',
      'javax.faces.ViewState' = viewState
    ),
    encode = "form"
  )
  attr(p, "viewState") <- viewState
  p
}

# EXTRACT METADATA --------------------------------------------------------

p <- start_session()
name_value_pairs <- function(html, css, cnames) {
  x <- read_html(html) %>% 
    html_nodes(css) %>% 
    html_children() %>% 
    html_attr("value")
  y <- read_html(html) %>% 
    html_nodes(css) %>% 
    html_children() %>% 
    html_text()
  df <- cbind.data.frame(x, y, stringsAsFactors = FALSE)
  df <- df[df[, 1] != -1, ]
  colnames(df) <- cnames
  df
}
courts <- name_value_pairs(p$response$content, css = '[id="form:court_input"]', cnames = c("court_id", "court"))

metadata_post <- function(session_zk, view_state, id) {
  p <- rvest:::request_POST(
    session_zk, 
    "http://www.e-grunt.ba/home.jsf",
    add_headers(
      'Referer' = 'http://www.e-grunt.ba'
    ),
    body = list(
      'javax.faces.partial.ajax' = 'true',
      'javax.faces.source' = 'form:court',
      'javax.faces.partial.execute' = 'form:court',
      'javax.faces.partial.render' = 'msgs msgsBottom form:municipality form:mpart form:cuTransferLast',
      'javax.faces.behavior.event' = 'change',
      'javax.faces.partial.event' = 'change',
      'form' = 'form',
      'g-recaptcha-response' = '',
      'form:court_focus' = '',
      'form:court_input' = id,
      'form:cuTransferLast' = '',
      'form:municipality_focus' = '',
      'form:mpart_focus' = '',
      'form:folder' = '',
      'form:parcel' = '',
      'form:parcelSub' = '',
      'javax.faces.ViewState' = view_state
    ),
    encode = "form"
  )
  return(p)
}

muni_post <- function(session_zk, view_state, id, muni_id) {
  p <- rvest:::request_POST(
    session_zk, 
    "http://www.e-grunt.ba/home.jsf",
    add_headers(
      'Referer' = 'http://www.e-grunt.ba'
    ),
    body = list(
      'javax.faces.partial.ajax' = 'true',
      'javax.faces.source' = 'form:municipality',
      'javax.faces.partial.execute' = 'form:municipality',
      'javax.faces.partial.render' = 'msgs msgsBottom form:mpart',
      'javax.faces.behavior.event' = 'change',
      'javax.faces.partial.event' = 'change',
      'form' = 'form',
      'g-recaptcha-response' = '',
      'form:court_focus' = '',
      'form:court_input' = id,
      'form:cuTransferLast' = '',
      'form:municipality_focus' = '',
      'form:municipality_input' = muni_id,
      'form:mpart_focus' = '',
      'form:folder' = '',
      'form:parcel' = '',
      'form:parcelSub' = '',
      'javax.faces.ViewState' = view_state
    ),
    encode = "form"
  )
  return(p)
}


metadata_i <- list()
for (i in seq_along(courts$court_id)) {
  print(i)
  p <- metadata_post(p, attributes(p)$viewState, courts$court_id[i])
  muni <- name_value_pairs(p$response$content, css = '[id="form:municipality_input"]', cnames = c("muni_id", "muni"))
  
  if (nrow(muni) > 1) {
    muni_ko <- list()
    for (j in seq_along(muni$muni_id)) {
      # print(j)
      p <- muni_post(p, attributes(p)$viewState, courts$court_id[i], muni$muni_id[j])
      ko <- name_value_pairs(p$response$content, css = '[id="form:mpart_input"]', cnames = c("ko_id", "ko"))
      if (nrow(ko) == 0) {
        ko <- data.frame(ko_id = NA, ko = NA, stringsAsFactors = FALSE)
      }
      muni_ko[[j]] <-  cbind.data.frame(muni[j, ], ko, stringsAsFactors = FALSE)
    }
    metadata_i[[i]] <- cbind.data.frame(courts[i, ], do.call(rbind, muni_ko), stringsAsFactors = FALSE)
  } else {
    ko <- name_value_pairs(p$response$content, css = '[id="form:mpart_input"]', cnames = c("ko_id", "ko"))
    meta <- cbind.data.frame(courts[i, ], muni, stringsAsFactors = FALSE)
    metadata_i[[i]] <- cbind.data.frame(meta, ko, stringsAsFactors = FALSE)
  }
}
metadata <- do.call(rbind, metadata_i)

metadata_post <- function(session_zk, view_state, recaptcha, court,
                          date = as.character(format.Date(Sys.Date() - 4, "%d.%m.%Y")),
                          muni, ko, zk
) {
  p <- rvest:::request_POST(
    session_zk, 
    "http://www.e-grunt.ba/home.jsf",
    add_headers(
      'Referer' = 'http://www.e-grunt.ba'
    ),
    body = list(
      'form' = 'form',
      'g-recaptcha-response' = recaptcha,
      'form:court_focus' = '',
      'form:court_input' = court,
      'form:cuTransferLast' = date,
      'form:municipality_focus' = '',
      'form:municipality_input' = muni,
      'form:mpart_focus' = '',
      'form:mpart_input' = ko,
      'form:folder' = zk,
      'form:parcel' = '',
      'form:parcelSub' = '',
      'form:j_idt61' = '',
      'javax.faces.ViewState' = view_state
    ),
    encode = "form"
  )
  return(p)
}

# example
result <- break_captcha()
p <- metadata_post(session_zk = p, view_state = attributes(p)$viewState, 
                   recaptcha = result, court = metadata$court_id[i],
                   muni = metadata$muni_id[i], ko =  metadata$ko_id[i], zk = j)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM