简体   繁体   中英

Can you continue iterating through a list after it has returned a value?

I am using selenium to scrape information from yahoo finance ( i know requests and bs4 is better but I wanted to do it this way.) I created a looping function to retrieve the stock ticker symbols from a list, but after it returns the first symbol it just resets and returns the first symbol again on the next loop iteration. I know that return is supposed to break the loop but I don't know any other way to return the value I'm looping for. I was wondering if I could ask you great and powerful oracles for help!!

The function is named miningLoop. Please let me know if you have any questions about this or anything?! I know that i could just put the function inside of the main and loop it there but can I do it from a function outside the main like this?

class miningCompanies():
    def prime(self):
        self.miningscraper()

    def miningscraper(self):
        while True:
            self.driver = webdriver.Chrome('/Users/admin/Downloads/Chromedriver')
            self.driver.get('https://finance.yahoo.com/quote/' + self.val + '?p=' + self.val + '&.tsrc=fin-srch')
            self.driver.implicitly_wait(5)
            time.sleep(5)
            self.scrapeData()
            time.sleep(5)

    def miningLoop(self):


        companies = ["XOM", "PM", "KXPLF", "PLG", "PLM", 'NCPCF', 'SBUM', 'OTMN', 'SLSR', 'NXXGF',
                     'CAHPF', 'FGOVF', 'ISVLF', 'SGRCF', 'KTGDF', 'KOREF', 'AHELF', 'DOLLF', 'DMEHF', 'TLRS', 'SSVFF',
                     'NEVDF', 'NMTLF', 'AAMMF', 'FFMGF', 'GLDLF', 'ELYGF', 'WLBMF', 'LRTNF', 'ANXGF', 'GPL', 'NAK',
                     'MLRKF', 'RVSDF', 'MUX', 'AUY', 'NGD', 'SLVRF', 'GORO', 'GOLD', 'EGO', 'SLVTF', 'ASX:AZS', 'AAGFF',
                     'ORAGF', 'SILV', 'CCJ', 'UEC', 'DGC.TO', 'OR', 'KGC', 'KOOYF', 'SVM', 'KL', 'JAG', 'TUO', 'BNCH', 'DV', 'ABRA', 'SDR', 'GGI', 'AUY', 'HMY', 'NST',
                     'BTG', 'EVN', 'SAR', 'EDV', 'EQX', 'NG', 'HL', 'PVG', 'SAND', 'CDE', 'MDKA', 'SSRM', 'SILV',
                     'USAS', 'AXU', 'GBR', 'AR', 'HGM', 'GOR', 'WGX', 'LUG', 'HOC', 'POG', 'TXG', 'MAG', 'FSM', 'CEY',
                     'RRL', 'SLR', 'AG', 'AAU', 'TGLDF', 'KLR']

        list = len(companies)
        for x in range(list):
            val = companies[x]
            x += 1


    def scrapeData(self):
        self.driver.implicitly_wait(10)
        time.sleep(2)
        self.PreviousClose = self.driver.find_element_by_xpath('//*[@id="quote-summary"]/div[1]/table/tbody/tr[1]/td[2]/span')
        self.EPS = self.driver.find_element_by_xpath('//*[@id="quote-summary"]/div[2]/table/tbody/tr[4]/td[2]/span')
        self.Market_Cap = self.driver.find_element_by_xpath('//*[@id="quote-summary"]/div[2]/table/tbody/tr[1]/td[2]/span')
        self.PE_Ratio = self.driver.find_element_by_xpath('//*[@id="quote-summary"]/div[2]/table/tbody/tr[3]/td[2]/span')
        self.Dividend = self.driver.find_element_by_xpath('//*[@id="quote-summary"]/div[2]/table/tbody/tr[6]/td[2]')
        self.apiDrive1()
        self.driver.implicitly_wait(10)
        time.sleep(3)
        statistics = self.driver.find_element_by_xpath('//*[@id="quote-nav"]/ul/li[5]/a/span')
        statistics.click()
        self.driver.implicitly_wait(10)
        time.sleep(3)
        self.shareShort = self.driver.find_element_by_xpath('//*[@id="Col1-0-KeyStatistics-Proxy"]/section/div[3]/div[2]/div/div[2]/div/div/table/tbody/tr[7]/td[2]')
        self.institutions = self.driver.find_element_by_xpath('//*[@id="Col1-0-KeyStatistics-Proxy"]/section/div[3]/div[2]/div/div[2]/div/div/table/tbody/tr[6]/td[2]')
        self.revenue = self.driver.find_element_by_xpath('//*[@id="Col1-0-KeyStatistics-Proxy"]/section/div[3]/div[3]/div/div[4]/div/div/table/tbody/tr[1]/td[2]')
        self.high = self.driver.find_element_by_xpath('//*[@id="Col1-0-KeyStatistics-Proxy"]/section/div[3]/div[2]/div/div[1]/div/div/table/tbody/tr[4]/td[2]')
        self.low = self.driver.find_element_by_xpath('//*[@id="Col1-0-KeyStatistics-Proxy"]/section/div[3]/div[2]/div/div[1]/div/div/table/tbody/tr[5]/td[2]')
        self.insiders = self.driver.find_element_by_xpath('//*[@id="Col1-0-KeyStatistics-Proxy"]/section/div[3]/div[2]/div/div[2]/div/div/table/tbody/tr[5]/td[2]')
        self.apiDrive2()
        #self.financial = self.driver.find_element_by_xpath('//*[@id="quote-nav"]/ul/li[8]/a/span')
        #financial.click()
        self.driver.implicitly_wait(3)
        time.sleep(2)
        self.driver.close()

    def apiDrive1(self):
        scope = ["https://spreadsheets.google.com/feeds", 'https://www.googleapis.com/auth/spreadsheets',
                 "https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/drive"]

        creds = ServiceAccountCredentials.from_json_keyfile_name("creds.json", scope)
        client = gspread.authorize(creds)
        sheet = client.open("Company Break Down").sheet1  # Open the spreadhseet
        data = sheet.get_all_records()
        #first page loop
        x = 106
        i = 1
        while i < x:
            if i == 24 or 43 or 63 or 84 or 102:
                i += 1
            # you have to put a cool down time between requests or Google API will reject you
            self.MC = sheet.update_cell(i, 2, self.Market_Cap.text)
            time.sleep(0.2)
            self.PC = sheet.update_cell(i, 4, self.PreviousClose.text)
            time.sleep(0.2)
            self.PER = sheet.update_cell(i, 19, self.PE_Ratio.text)
            time.sleep(0.2)
            self.EPSC = sheet.update_cell(i, 21, self.EPS.text)
            time.sleep(0.2)
            self.Div = sheet.update_cell(i, 23, self.Dividend.text)
            time.sleep(0.2)
            i += 1
            break


    def apiDrive2(self):
        self.driver.implicitly_wait(10)
        time.sleep(5)
        scope = ["https://spreadsheets.google.com/feeds", 'https://www.googleapis.com/auth/spreadsheets',
                 "https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/drive"]

        creds = ServiceAccountCredentials.from_json_keyfile_name("creds.json", scope)
        client = gspread.authorize(creds)
        sheet = client.open("Company Break Down").sheet1  # Open the spreadhseet
        data = sheet.get_all_records()
        x = 106
        i = 1
        while i < x:
            if i == 24 or 43 or 63 or 84 or 102:
                i += 1
            # you have to put a cool down time between requests or Google API will reject you
            self.institution = sheet.update_cell(i, 7, self.institutions.text)
            time.sleep(0.2)
            self.short = sheet.update_cell(i, 28, self.shareShort.text)
            time.sleep(0.2)
            self.rev = sheet.update_cell(i, 21, self.revenue.text)
            time.sleep(0.2)
            self.high52 = sheet.update_cell(i, 34, self.high.text)
            time.sleep(0.2)
            self.insider = sheet.update_cell(i, 8, self.insiders.text)
            time.sleep(0.2)
            self.low52 = sheet.update_cell(i, 33, self.low.text)
            i += 1
            break





beserk = miningCompanies()
beserk.prime()

The line if i == 24 or 43 or 63 or 84 or 102: is actually being executed like this:

if (i == 24) or (43) or (63) or (84) or (102):
    ...

So it tries to evaluate i == 24 , and if that fails, it checks if 43 is True , which is always the case.

Instead you need to rewrite i == x for each number, or use the in operator:

if i in {24, 43, 63, 84, 102}:
    ...

If this doesn't fix your issue, you'll need to explain what part of your code doesn't work, and what outcome you are expecting

Going back to your original question. If you are iterating through a list and return from the function that contains the iterator, then you lose the iterator. You might want to look at generators, and use yield instead of return .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM