#!/usr/bin/env python
import pika
def doQuery( conn, i ) :
cur = conn.cursor()
cur.execute("SELECT * FROM table OFFSET %s LIMIT 100000", (i,))
return cur.fetchall()
print "Using psycopg2"
import psycopg2
myConnection = psycopg2.connect( host=hostname, user=username,
password=password, dbname=database )
connection =
pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue2')
endloop = False
i = 1
while True:
results = doQuery( myConnection, i )
j = 0
while j < 10000:
try:
results[j][-1]
except:
endloop = True
break
message = str(results[j][-1]).encode("hex")
channel.basic_publish(exchange='',
routing_key='task_queue2',
body=message
#properties=pika.BasicProperties(
#delivery_mode = 2, # make message persistent
)#)
j = j + 1
# if i % 10000 == 0:
# print i
if endloop == False:
break
i = i + 10000
The SQL query is taking too long to execute when i
gets to 100,000,000, but I have about two billion entries I need to put into the queue. Anyone know of a more efficient SQL query that I can run so that I can get all those two billion into the queue faster?
psycopg2
supports server-side cursors , that is, a cursor that is managed on the database server rather than in the client. The full result set is not transferred all at once to the client, rather it is fed to it as required via the cursor interface.
This will allow you to perform the query without using paging (as LIMIT/OFFSET implements), and will simplify your code. To use a server side cursor use the name
parameter when creating the cursor.
import pika
import psycopg2
with psycopg2.connect(host=hostname, user=username, password=password, dbname=database) as conn:
with conn.cursor(name='my_cursor') as cur: # create a named server-side cursor
cur.execute('select * from table')
connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue2')
for row in cur:
message = str(row[-1]).encode('hex')
channel.basic_publish(exchange='', routing_key='task_queue2', body=message)
You might want to tweak cur.itersize
to improve performance if necessary.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.