[英]Python optimization
f = open('wl4.txt', 'w')
hh = 0
######################################
for n in range(1,5):
for l in range(33,127):
if n==1:
b = chr(l) + '\n'
f.write(b)
hh += 1
elif n==2:
for s0 in range(33, 127):
b = chr(l) + chr(s0) + '\n'
f.write(b)
hh += 1
elif n==3:
for s0 in range(33, 127):
for s1 in range(33, 127):
b = chr(l) + chr(s0) + chr(s1) + '\n'
f.write(b)
hh += 1
elif n==4:
for s0 in range(33, 127):
for s1 in range(33, 127):
for s2 in range(33,127):
b = chr(l) + chr(s0) + chr(s1) + chr(s2) + '\n'
f.write(b)
hh += 1
######################################
print "We Made %d Words." %(hh)
######################################
f.close()
那么,有沒有什么方法可以讓它更快?
可以進一步顯着改進。
以下腳本文件演示了這些,僅使用(簡潔)4號循環(占用了90%以上的時間)。
方法0:OP的原始代碼
方法1:John Kugleman的解決方案
方法2:(1)並將一些字符串連接移出內部循環
方法3:(2)並將代碼放在函數中 - 訪問局部變量比全局變量快得多。 任何腳本都可以這樣做。 許多腳本應該這樣做。
方法4:(3)並在列表中累積字符串然后加入它們並寫入它們。 請注意,這會使用您可能不相信的內存。 我的代碼不會嘗試為整個文件執行此操作,因為(127 - 33)** 4是78M字符串。 在一個32位的盒子上,單獨列表為78 * 4 = 312Mb(忽略列表末尾未使用的內存),加上str *對象的78 * 28 = 2184 Mb(sys.getsizeof(“1234”)產生) 28),加上78 * 5 = 390 Mb的連接結果。 你只是吹了你的用戶地址空間或你的ulimit或其他東西吹。 或者,如果您有1 Gb的實際內存,其中128Mb已被視頻驅動程序snarfed,但有足夠的交換空間,您有時間吃午餐(如果運行特定的操作系統,晚餐也是如此)。
方法5:(4)並且不要在列表中詢問其追加屬性的下落7800萬次:-)
這是腳本文件:
import time, sys
time_function = time.clock # Windows; time.time may be better on *x
ubound, which = map(int, sys.argv[1:3])
t0 = time_function()
if which == 0:
### original ###
f = open('wl4.txt', 'w')
hh = 0
n = 4
for l in range(33, ubound):
if n == 1:
pass
elif n == 2:
pass
elif n == 3:
pass
elif n == 4:
for s0 in range(33, ubound):
for s1 in range(33, ubound):
for s2 in range(33,ubound):
b = chr(l) + chr(s0) + chr(s1) + chr(s2) + '\n'
f.write(b)
hh += 1
f.close()
elif which == 1:
### John Kugleman ###
f = open('wl4.txt', 'w')
chars = [chr(c) for c in range(33, ubound)]
hh = 0
for l in chars:
for s0 in chars:
for s1 in chars:
for s2 in chars:
b = l + s0 + s1 + s2 + '\n'
f.write(b)
hh += 1
f.close()
elif which == 2:
### JohnK, saving + ###
f = open('wl4.txt', 'w')
chars = [chr(c) for c in range(33, ubound)]
hh = 0
for L in chars: # "L" as in "Legible" ;-)
for s0 in chars:
b0 = L + s0
for s1 in chars:
b1 = b0 + s1
for s2 in chars:
b = b1 + s2 + '\n'
f.write(b)
hh += 1
f.close()
elif which == 3:
### JohnK, saving +, function ###
def which3func():
f = open('wl4.txt', 'w')
chars = [chr(c) for c in range(33, ubound)]
nwords = 0
for L in chars:
for s0 in chars:
b0 = L + s0
for s1 in chars:
b1 = b0 + s1
for s2 in chars:
b = b1 + s2 + '\n'
f.write(b)
nwords += 1
f.close()
return nwords
hh = which3func()
elif which == 4:
### JohnK, saving +, function, linesep.join() ###
def which4func():
f = open('wl4.txt', 'w')
chars = [chr(c) for c in range(33, ubound)]
nwords = 0
for L in chars:
accum = []
for s0 in chars:
b0 = L + s0
for s1 in chars:
b1 = b0 + s1
for s2 in chars:
accum.append(b1 + s2)
nwords += len(accum)
accum.append("") # so that we get a final newline
f.write('\n'.join(accum))
f.close()
return nwords
hh = which4func()
elif which == 5:
### JohnK, saving +, function, linesep.join(), avoid method lookup in loop ###
def which5func():
f = open('wl4.txt', 'w')
chars = [chr(c) for c in range(33, ubound)]
nwords = 0
for L in chars:
accum = []; accum_append = accum.append
for s0 in chars:
b0 = L + s0
for s1 in chars:
b1 = b0 + s1
for s2 in chars:
accum_append(b1 + s2)
nwords += len(accum)
accum_append("") # so that we get a final newline
f.write('\n'.join(accum))
f.close()
return nwords
hh = which5func()
else:
print "Bzzzzzzt!!!"
t1 = time_function()
print "Method %d made %d words in %.1f seconds" % (which, hh, t1 - t0)
以下是一些結果:
C:\junk\so>for %w in (0 1 2 3 4 5) do \python26\python wl4.py 127 %w
C:\junk\so>\python26\python wl4.py 127 0
Method 0 made 78074896 words in 352.3 seconds
C:\junk\so>\python26\python wl4.py 127 1
Method 1 made 78074896 words in 183.9 seconds
C:\junk\so>\python26\python wl4.py 127 2
Method 2 made 78074896 words in 157.9 seconds
C:\junk\so>\python26\python wl4.py 127 3
Method 3 made 78074896 words in 126.0 seconds
C:\junk\so>\python26\python wl4.py 127 4
Method 4 made 78074896 words in 68.3 seconds
C:\junk\so>\python26\python wl4.py 127 5
Method 5 made 78074896 words in 60.5 seconds
針對OP的問題進行更新
msgstr“”“當我嘗試添加for循環時,我收到了accum_append的內存錯誤..問題是什么?”“”
我不知道問題是什么; 我不能在這個距離讀你的代碼。 猜猜:如果你想做長度== 5,你可能得到了accum
初始化並在錯誤的位置寫入位,而且accum
試圖超出系統內存的容量(正如我希望我之前解釋的那樣) )。
msgstr“”“現在方法5是最快的一個,但是它可以說出長度4 ...我怎么能做出我想要的多少?:)”“”
您有兩種選擇:(1)您繼續使用嵌套for循環(2),查看不使用嵌套for循環的答案,並動態指定長度。
方法4和5通過使用accum
獲得了加速,但是這樣做的方式是根據對將使用多少內存的確切知識而定制的。
以下是另外3種方法。 101是tgray的方法,沒有額外的內存使用。 201是Paul Hankin的方法(加上一些寫文件代碼),同樣沒有額外的內存使用。 這兩種方法速度大致相同,並且在方法3的速度范圍內。 它們都允許動態指定所需的長度。
方法102是使用固定的1Mb緩沖區的tgray方法 - 它試圖通過減少對f.write()的調用次數來節省時間......您可能希望嘗試使用緩沖區大小。 如果您願意,可以創建正交202方法。 請注意,tgray的方法使用itertools.product
,你需要Python 2.6,而Paul Hankin的方法使用已經存在一段時間的生成器表達式。
elif which == 101:
### tgray, memory-lite version
def which101func():
f = open('wl4.txt', 'w')
f_write = f.write
nwords = 0
chars = map(chr, xrange(33, ubound)) # create a list of characters
length = 4 #### length is a variable
for x in product(chars, repeat=length):
f_write(''.join(x) + '\n')
nwords += 1
f.close()
return nwords
hh = which101func()
elif which == 102:
### tgray, memory-lite version, buffered
def which102func():
f = open('wl4.txt', 'w')
f_write = f.write
nwords = 0
chars = map(chr, xrange(33, ubound)) # create a list of characters
length = 4 #### length is a variable
buffer_size_bytes = 1024 * 1024
buffer_size_words = buffer_size_bytes // (length + 1)
words_in_buffer = 0
buffer = []; buffer_append = buffer.append
for x in product(chars, repeat=length):
words_in_buffer += 1
buffer_append(''.join(x) + '\n')
if words_in_buffer >= buffer_size_words:
f_write(''.join(buffer))
nwords += words_in_buffer
words_in_buffer = 0
del buffer[:]
if buffer:
f_write(''.join(buffer))
nwords += words_in_buffer
f.close()
return nwords
hh = which102func()
elif which == 201:
### Paul Hankin (needed output-to-file code added)
def AllWords(n, CHARS=[chr(i) for i in xrange(33, ubound)]):
#### n is the required word length
if n == 1: return CHARS
return (w + c for w in AllWords(n - 1) for c in CHARS)
def which201func():
f = open('wl4.txt', 'w')
f_write = f.write
nwords = 0
for w in AllWords(4):
f_write(w + '\n')
nwords += 1
f.close()
return nwords
hh = which201func()
您可以創建一次range(33, 127)
並將其保存。 不必重復創建它會在我的機器上將運行時間縮短一半。
chars = [chr(c) for c in range(33, 127)]
...
for s0 in chars:
for s1 in chars:
for s2 in chars:
b = l + s0 + s1 + s2 + '\n'
f.write(b)
hh += 1
外環似乎毫無意義。 為什么不簡單:
for l in range(33,127)
.. your code for the n==1 case
for l in range(33,127)
.. your code for the n==2 case
for l in range(33,127)
.. your code for the n==3 case
for l in range(33,127)
.. your code for the n==4 case
這將更快更容易閱讀。
在進行涉及迭代的操作時,一個好的起點是itertools
包。
在這種情況下,您似乎想要product
功能 。 哪個給你:
笛卡爾積,相當於嵌套的for循環
因此,要獲取您正在創建的“單詞”列表:
from itertools import product
chars = map(chr, xrange(33,127)) # create a list of characters
words = [] # this will be the list of words
for length in xrange(1, 5): # length is the length of the words created
words.extend([''.join(x) for x in product(chars, repeat=length)])
# instead of keeping a separate counter, hh, we can use the len function
print "We Made %d Words." % (len(words))
f = open('wl4.txt', 'w')
f.write('\n'.join(words)) # write one word per line
f.close()
結果我們得到了您的腳本給我們的結果。 因為itertools
是用c
實現的,所以它也更快。
編輯:
根據John Machin關於內存使用情況的非常精明的評論,這里更新的代碼在我在整個范圍內運行時沒有給出內存錯誤(33,127)。
from itertools import product
chars = map(chr, xrange(33,127)) # create a list of characters
f_words = open('wl4.txt', 'w')
num_words = 0 # a counter (was hh in OPs code)
for length in xrange(1, 5): # length is the length of the words created
for char_tup in product(chars, repeat=length):
f_words.write(''.join(char_tup) + '\n')
num_words += 1
f.close()
print "We Made %d Words." % (num_words)
這在我的機器上運行大約4分鍾(240秒)。
它如何適用於任意字長:(密碼生成器?)
f = open('wl4.txt', 'w')
hh=0
chars = map(chr,xrange(33, 127))
def func(n, result):
if (n == 0):
f.write(result + "\n")
hh +=1
else:
for c in chars:
func(n-1, result+c)
for n in range(1, 5):
func(n,"")
######################################
print "We Made %d Words." %(hh)
######################################
f.close()
你需要按長度排序的所有單詞嗎? 如果你可以將長度混合在一起,你可以稍微改善John Kugelman的答案:
f = open("wl4.txt", "w")
chars = [chr(c) for c in range(33, 127)]
c = len(chars)
count = c + c*c + c**3 + c**4
for c0 in chars:
print >>f, c0
for c1 in chars:
s1 = c0 + c1
print >>f, s1
for c2 in chars:
s2 = s1 + c2
print >>f, s2
for c3 in chars:
print >>f, s2 + c3
print "We Made %d Words." % count
直接計算hh而不是所有的遞增也是一個很大的勝利(在這台筆記本電腦上大約15%)。 使用print而不是f.write也有所改進,但我不知道為什么會這樣。 這個版本對我來說大約需要39秒。
這是一個簡短的遞歸解決方案。
def AllWords(n, CHARS=[chr(i) for i in xrange(33, 127)]):
if n == 1: return CHARS
return (w + c for w in AllWords(n - 1) for c in CHARS)
for i in xrange(1, 5):
for w in AllWords(i):
print w
PS:排除字符127是錯誤的嗎?
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.