I think I have narrowed down my programs memory issue to my pandas dataframe. Every loop the ram usage increase by about 300-800kb. This doesn't matter short term but this program uses the stubhub API to get tickets for an eagles game, so I'd like to run it non stop until the game happens. Which is impossible as within a couple hours the process uses all my systems ram.
I made a throwaway api account on stubhub for this so no worries.
#LIBS
import requests
import base64
import json
import pandas as pd
import datetime
from time import sleep
import gc
#SETTINGS
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.mode.chained_assignment = None
lprice = 1
row = 250
start = 0
check = 0
#USER INPUT
pro_url = 'https://pro.stubhub.com/simweb/sim/services/priceanalysis?eventId=103577414§ionId=0'
eventid = pro_url.replace("https://pro.stubhub.com/simweb/sim/services/priceanalysis?eventId=", "").replace("§ionId=0", "")
lprice = int(input('By default enter 1, if prices are coming back incorrect, press 2: '))
#API TOKENS && REQUESTS
app_token = '77de9c22-1799-3f30-8a6e-546c4abd9afd'
consumer_key = 'fSYdVsJFHSxn1hf2Z5Ubv5KULaka'
consumer_secret = '5Deehc9tWoN2AMSwpdVMpdmLWqwa'
stubhub_username = 'ejmoncrief@gmail.com'
stubhub_password = 'st^acerfl#owt12345!'
combo = consumer_key + ':' + consumer_secret
basic_authorization_token = base64.b64encode(combo.encode('utf-8'))
headers = {
'Content-Type':'application/x-www-form-urlencoded',
'Authorization':'Basic '+basic_authorization_token.decode('utf-8'),}
body = {
'grant_type':'password',
'username':stubhub_username,
'password':stubhub_password,
'scope':'PRODUCTION'}
url = 'https://api.stubhub.com/login'
r = requests.post(url, headers=headers, data=body)
token_respoonse = r.json()
access_token = token_respoonse['access_token']
user_GUID = r.headers['X-StubHub-User-GUID']
inventory_url = 'https://api.stubhub.com/search/inventory/v2'
headers['Authorization'] = 'Bearer ' + access_token
headers['Accept'] = 'application/json'
headers['Accept-Encoding'] = 'application/json'
#MAKE REQUEST
def game_req():
global row
global start
global check
data = {'eventid':eventid, 'rows':row, 'start': start}
inventory = requests.get(inventory_url, headers=headers, params=data)
#print(inventory) #PRINT REQUEST RESPONSE
inv = inventory.json()
start = inv['start']
total_listings = inv['totalListings']
try: #SEE IF ANY DATA, IF NOT RESTART REQ
listing_df = pd.DataFrame(inv['listing'])
except:
game_req()
listing_df['amount'] = listing_df.apply(lambda x: x['currentPrice']['amount'], axis=1)
#DROP TABLES, IF NOT EXISTS THEN PASS
if lprice == 1:
try:
listing_df.drop('currentPrice', axis=1, inplace=True)
except:
pass
else:
try:
listing_df.drop('listingPrice', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('amount', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('businessGuid', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('deliveryMethodList', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('deliveryTypeList', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('dirtyTicketInd', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('faceValue', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('isGA', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('listingAttributeCategoryList', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('listingAttributeList', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('score', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('sellerOwnInd', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('zoneId', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('ticketSplit', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('splitVector', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('splitOption', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('sellerSectionName', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('seatNumbers', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('listingId', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('sectionId', axis=1, inplace=True)
except:
pass
try:
listing_df.drop('zoneName', axis=1, inplace=True)
except:
pass
#CHECK TICKETS
d = listing_df.to_dict(orient='records') #pd df to dict
a = listing_df.values.tolist() #dict to list of lists
for i in a:
with open(eventid+'.txt', 'a+') as y:
with open(eventid+'.txt', 'r') as z:
if str(i)+'\n' in z:
pass
else:
y.write(str(i)+'\n')
head = ['Price', 'Qty', 'Row', 'Section']
D=dict.fromkeys(head)
D.update(zip(head,i))
D = str(D)
D = D.replace("{", '').replace("}", '').replace("{'amount': ", '').replace("'currency': 'USD'}, ", '').replace("'", '').replace("amount: ", '').replace(", currency: USD", '').replace(",", ' | ')
print(D)
y.close()
z.close()
gc.collect()
check +=1
print('Checked Listings '+str(check)+' Times | Last Check At: '+str(datetime.datetime.now()))
print('Total Listings: '+str(total_listings))
sleep(10)
while start < total_listings:
if start >(total_listings-250):
start += total_listings-start
else:
start+=250
row = total_listings-start
game_req()
else:
start = 0
game_req()
game_req()
Recursion is not the same as a loop. When you call a function from within a function, the Python interpreter doesn't know that you're done with the variables from the original call, so it preserves them until the call stack is finally unwound.
IOW:
import os
import time
import psutil # third-party package, you'd need to install it
def no_wasted_memory():
while True:
x = list(range(1000))
print(psutil.Process(os.getpid()).memory_info().rss)
time.sleep(1)
def lots_of_wasted_memory():
x = list(range(1000))
print(psutil.Process(os.getpid()).memory_info().rss)
time.sleep(1)
lots_of_wasted_memory()
gives me
In [101]: no_wasted_memory()
108367872
108367872
108367872
108367872
108367872
and
In [103]: lots_of_wasted_memory()
109080576
109105152
109137920
109166592
109195264
[...]
If you want something to go on indefinitely, or at least until a condition is reached, use a while
loop.
Aside #1: don't use global variables unless you need to. Pass variables honestly.
Aside #2: don't use bare excepts, they hide errors.
Aside #3: don't repeat yourself. All of those drop lines could be reduced to
df = df.drop(columns_to_drop, axis=1, errors='ignore')
where columns_to_drop
is, unsurprisingly, a list of the column names you want to drop.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.