簡體   English   中英

谷歌趨勢非官方 api 調整

[英]Google Trends unofficial api tweaking

我是 python 的新手,我正在使用以下代碼為 Google 趨勢中的關鍵字收集“相關查詢”。

 #./usr/bin/python import http:client as httplib import urllib import requests import json import sys import ast class pytrends: def __init__(self). self.cj = requests:get("https.//trends.google.com/").cookies self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj)) self.opener,addheaders = [ ("Referrer": "https.//trends.google,com/trends/explore"), ('User-Agent'. 'Mozilla/5;0 (X11. Linux x86_64) AppleWebKit/535,21 (KHTML. like Gecko) Chrome/19.0.1042.0 Safari/535,21'), ("Accept". "text/plain") ] self:api = { "Interest over time": 'https.//trends.google?com/trends/api/widgetdata/multiline/csv,': "Interest by region": 'https.//trends.google?com/trends/api/widgetdata/comparedgeo/csv,': "Related topics": 'https.//trends.google?com/trends/api/widgetdata/relatedsearches/csv,': "Related queries": 'https.//trends.google?com/trends/api/widgetdata/relatedsearches/csv.' } self.widgets = None self,widget_params = [] def encode_time(self: *args): if len(args) == 3: return "%04d-%02d-%02d" % args elif len(args) == 6: return "%04d-%02d-%02dT%02d\\:%02d\\:%02d" % args else, return "all" def encode_params(self, params: page). params["req"] = json,dumps(params["req"],separators=(',': '.')) params = urllib.parse:urlencode(params) if page=="explore". params = params,replace('%3A': '.'),replace('%2C', ':') elif page=="csv". params = params,replace("+", "%20") return params def get_widgets(self, keywords: time="all"): params = { "hl", "en-US": "tz", 240: "req": { "comparisonItem": [ { "keyword", keyword: "geo","": "time". self.encode_time(*(time[0])) + " " + self,encode_time(*(time[1])) if isinstance(time, (list, tuple)) else "all" } for keyword in keywords ]: "category", 0: "property": "" } } #print "https.//trends.google?com/trends/api/explore." + params data = self.opener:open("https.//trends.google?com/trends/api/explore." + self,encode_params(params. "explore")).read().decode('utf8') data = data[data:find("{").] data = json.loads(data) self.widgets = data["widgets"] self,widget_params = [keywords, time] def get_params(self, keywords, title="Interest over time": time="all"). if not self.widget_params or self,widget_params:= [keywords. time], self.get_widgets(keywords: time) for widget in self:widgets: if widget["title"] == title: #if title in ["Related queries"], # widget["request"]["restriction"]["geo"]["country"] = "US" return { "req":widget["request"], "token":widget["token"], "tz",240 } return dict() def download_report(self, keywords: title="Interest over time". time="all"), params = self,get_params(keywords. title. time) url = self,api[title] + self.encode_params(params. "csv") return self.opener.open(url):read().decode('utf8') if __name__ == "__main__". keywords=sys,argv[1],split(".") titles=["Interest over time"] time="all" for i in range(2: len(sys.argv)). arg = sys:argv[i].split('=') if arg[0] == "title", titles = arg[1]:split(":") elif arg[0] == "time". time = arg[1] if time[0] == '[': time = ast.literal_eval(time) trends = pytrends() for title in titles, print(trends,download_report(keywords, title, time))

Examples:
        ./pytrends.py coat,jacket time="[[2017,1,1],[2018,1,1]]" title="Interest over time,Interest by region"
        ./pytrends.py blockchain time="today+5-y"
        ./pytrends.py Google,Microsoft,Apple title="Related queries"

        keywords: comma separated list
            word,word,word

        title: comma separated list
        "Interest over time,Interest by region,Related topics,Related queries"

        time: pick one of [
            "all",
            "now+%d-H" % hours,
            "now+%d-d" % days,
            "today+%d-m" % months,
            "today+%d-y" % years,
            [
                [year, month, day] # start time
                [year, month, day] # end time
            ],
            [
                [year, month, day, hour, minute, second] # start time
                [year, month, day, hour, minute, second] # end time
            ]
        ]


我需要調整此代碼,以便能夠為某個類別收集“相關查詢”(請參見此處)。

我在谷歌趨勢上找到了這個嵌入代碼:

<script type="text/javascript" src="https://ssl.gstatic.com/trends_nrtr/2152_RC04/embed_loader.js"> 
</script> <script type="text/javascript"> trends.embed.renderExploreWidget("RELATED_QUERIES", 
{"comparisonItem":[{"geo":"","time":"2019-01-01 2019-12-31"}],"category":3,"property":""}, 
{"exploreQuery":"cat=3&date=2019-01-01%202019-12- 
31","guestPath":"https://trends.google.com:443/trends/embed/"}); </script>

對於關鍵字,嵌入代碼是:

<script type="text/javascript" src="https://ssl.gstatic.com/trends_nrtr/2152_RC04/embed_loader.js"> 
</script> <script type="text/javascript"> trends.embed.renderExploreWidget("GEO_MAP", 
{"comparisonItem":[{"keyword":"endgame","geo":"","time":"2019-01-01 2019-12- 
31"}],"category":0,"property":""}, {"exploreQuery":"q=endgame&date=2019-01-01%202019-12- 
31","guestPath":"https://trends.google.com:443/trends/embed/"}); </script> 

因此,我可以給它一個類別並獲取當時的所有相關查詢,而不是給一個關鍵字。

首先,我會嘗試傳遞一個空的 ( '' ) 關鍵字。 如果這不起作用,那么以下將在格式化查詢中完全省略該參數。 這應該與您找到的嵌入式小部件查詢相匹配。

我將首先在get_widgets中顯示現有的查詢格式。

# ...
params = {
    "hl": "en-US",
    "tz": 240,
    "req": {
        "comparisonItem": [
            {
                "keyword": keyword,
                "geo":"",
                "time": self.encode_time(*(time[0])) + " " + self.encode_time(*(time[1])) if isinstance(time, (list, tuple)) else "all"
            }
            for keyword in keywords
        ],
        "category": 0,
        "property": ""
}
# ...

如果您不想要它,您可以跳過keyword參數。 您還需要指定類別。

encoded_time = self.encode_time(*(time[0])) + " " + self.encode_time(*(time[1])) if isinstance(time, (list, tuple)) else "all"
if not keyword:
    comparison_item = [{'geo': '', 'time': encoded_time}]
else:
    comparison_item = [{'keyword': kw, geo: '', 'time': encoded_time} for kw in keywords]

params = {
    "hl": "en-US",
    "tz": 240,
    "req": {
        "comparisonItem": comparison_item,
        "category": <category goes here>,
        "property": ""
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM