繁体   English   中英

Python 3.2 文件 I/O。 根据预设的大小限制生成和拆分文件。 如何?

[英]Python 3.2 File I/O. Generating & Spliting a file based on preset size limit. How?

我正在处理一些生成给定字符集的所有排列的代码。 尽管我的代码在 Python 2.7 中工作,但由于字符串中的许多更改,它不再在 Python 3.x 中工作。 我想适应,因为我是 Python 新手,我希望你能给我一点推动。 =)

我的问题是,当 Python 生成您选择的单词列表时,输出文件大小会相应增加。 我想让你告诉我如何让这个脚本检查预设的文件大小,如果达到,打开一个新文件并继续写入排列。

例子:

numeric_lowercase.000001
numeric_lowercase.000002
numeric_lowercase.000003

请记住,我已经查看了站点上的大多数示例,但它们不适用于 Python 3.2。

到目前为止,这是我的 Python 3.2 工作代码:

import itertools
import subprocess
import os
from string import digits, ascii_lowercase, ascii_uppercase, punctuation

if os.name == 'nt':
    def clear_console():
        subprocess.call("cls", shell=True)
        return
else:
    def clear_console():
        subprocess.call("clear", shell=True)
        return

def generate_phone_numbers(area_code):
    f = open('phones.txt', 'w')
    for i in range(2010000, 9999999):
        f.write(area_code + str(i) + '\n')

def generate_wordlist(lst_chars, min_digit, max_digit, lst_name):
    f = open(lst_name, 'w')
    for curr_length in range(min_digit, max_digit + 1):
        for curr_digit in itertools.product(lst_chars, repeat=curr_length):
            f.write(''.join(curr_digit) + '\n')

print ('')
print ('  wgen - Menu')

choice = 0

while int(choice) not in range(1,6):
    clear_console()
    choice = input('''
  1. Phone numbers for a given area code.
  2. Numbers.
  3. Numbers + Lowercase.
  4. Numbers + Lowercase + Uppercase.
  5. Numbers + Lowercase + Uppercase + Punctuation.

  Enter Option: ''')

print ('')

choice = int(choice)

if choice == 1:
    area_code = input('''
  Please enter Area Code: ''')
    area_code = str(area_code)
    area_code = area_code.strip()
    if len(area_code) == 3:
        print ('')
        print ('  Generating phone numbers for area code ' + area_code + '.')
        print ('  Please wait...')
        generate_phone_numbers(area_code)

if choice == 2:
    min_digit = input('  What is the minimum size of the word? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  What is the maximum size of the word? ')
    max_digit = int(max_digit)
    chars = digits
    lst_name = 'numeric.txt'
    print ('')
    print ('  Generating numbers between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(chars, min_digit, max_digit, lst_name)

if choice == 3:
    min_digit = input('  What is the minimum size of the word? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  What is the maximum size of the word? ')
    max_digit = int(max_digit)
    chars = digits + ascii_lowercase
    lst_name = 'numeric_lowercase.txt'
    print ('')
    print ('  Generating numbers & lowercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(chars, min_digit, max_digit, lst_name)

if choice == 4:
    min_digit = input('  What is the minimum size of the word? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  What is the maximum size of the word? ')
    max_digit = int(max_digit)
    chars = digits + ascii_lowercase + ascii_uppercase
    lst_name = 'numeric_lowercase_uppercase.txt'
    print ('')
    print ('  Generating numbers, lowercase & uppercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(chars, min_digit, max_digit, lst_name)

if choice == 5:
    min_digit = input('  What is the minimum size of the word? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  What is the maximum size of the word? ')
    max_digit = int(max_digit)
    chars = punctuation
    lst_name = 'numeric_lowercase_uppercase_punctuation.txt'
    print ('')
    print ('  Generating numbers, lowercase, uppercase & punctuation between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(chars, min_digit, max_digit, lst_name)

我认为最好的解决方案是编写一个像文件一样的类,但提供分块功能。 您的程序只是写入此对象,就好像它是一个常规文件一样。

下面的实现不会拆分字符串(如果你调用f.write("this is a test")整个消息保证进入一个文件)并且它只会在超过限制时开始一个新的,所以文件将略大于块大小。 此行为全部在write()方法中,如果需要,可以更改。

class chunkyfile(object):
    def __init__(self, filename, chunksize=1000000, mode="w", encoding=None, 
                 extension="", start=0, digits=6):
        self.filename  = filename
        self.chunksize = chunksize
        self.chunkno   = start
        self.file      = None
        self.mode      = mode
        self.encoding  = encoding
        self.digits    = digits
        self.extension = ("." * bool(extension) * (not extension.startswith(".")) +
                          extension)
        self.softspace = 0       # for use with print

    def _nextfile(self):
        self.file and self.file.close()
        self.file = open(self.filename + str(self.chunkno).rjust(self.digits, "0") + 
                         self.extension, mode=self.mode, encoding=self.encoding)
        self.chunkno += 1

    def write(self, text):
        self.file and self.file.tell() > self.chunksize and self.close()
        self.file or self._nextfile()
        self.file.write(text)

    # convenience method, equivalent to print(... file=f)
    # requires Python 3.x or from __future__ import print in Py2
    def print(*objects, sep=" ", end="\n", flush=False):
        print(*objects, sep=sep, end=end, flush=flush, file=self)

    def writelines(self, lines):
        # do it a line at a time in case we need to split
        for line in lines: self.write(line)

    def flush(self):
        self.file and self.file.flush()

    def close(self):
        self.file = self.file and self.file.close()

    # support "with" statement
    def __enter__(self):
        return self

    def __exit__(self, e, value, tb):
        self.close()

# now use the file
with chunkyfile(r"C:\test", 10, extension="txt", encoding="utf8") as f:
    f.write("FINALLY ROBOTIC BEINGS RULE THE WORLD")
    f.write("The humans are dead")
    f.write("The humans are dead")
    f.write("We used poisonous gasses")
    f.write("And we poisoned their asses")

通过一些预先配置,您可以使用 stdlib 中的日志记录库中内置的 RotatingFileHandler。

import logging
from logging.handlers import RotatingFileHandler

log = logging.getLogger('myprog.stufflogger')
log.propagate = False #ensure that we don't mess with other logging

#configure RotatingFileHandler
handler = RotatingFileHandler('base_file_name.txt', maxBytes=1024*1024*20)
handler.setFormatter(logging.Formatter('%(message)s')
handler.terminator = '' # default is new line

log.addHandler(handler)


# you can now use any of the log methods to add the values

log.info('stuff')

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM