I am trying to get each row sorted in descending order.
Right now this script splits a large file at a pattern 'Title=', creates individual files and renames them for what comes after the 'Title=' then counts the frequency of a set of words within each file. It then writes those to a CSV file.
import os
from collections import *
import glob
import csv
select='D:\To.Sort.txt'
sav='Sorted.3.csv'
def files():
n = 0
while True:
n += 1
yield open('wworking%d.txt' % n, 'w')
pat = 'Title='
fs = files()
outfile = next(fs)
with open(select,'r') as infile:
for line in infile:
if pat not in line:
outfile.write(line)
else:
items = line.split(pat)
outfile.write(items[0])
for item in items[1:]:
outfile = next(fs)
outfile.write(pat + item)
infile.close()
outfile.close()
os.remove('wworking1.txt')
for filename in os.listdir("."):
if not filename.startswith("wworking"): continue
base, ext = os.path.splitext(filename)
with open(filename, 'r') as infile:
newname = infile.next().rstrip()
newname = newname.replace(newname[:6],'')
newname += ext
os.rename(filename, newname)
ofile = open(sav, "wb")
writer = csv.writer(ofile, quoting=csv.QUOTE_NONNUMERIC)
writer.writerow( ('Title', 'Class', 'Count') )
def word_frequency(fileobj, words):
"""Build a Counter of specified words in fileobj"""
# initialise the counter to 0 for each word
ct = Counter(dict((w, 0) for w in words))
file_words = (word for line in fileobj for word in line.split())
filtered_words = (word for word in file_words if word in words)
return Counter(filtered_words)
def count_words_in_dir(dirpath, words, action=None):
"""For each .txt file in a dir, count the specified words"""
for filepath in glob.iglob(os.path.join(dirpath, '*.txt')):
with open(filepath) as f:
ct = word_frequency(f, words)
if action:
action(filepath, ct)
def print_csv(filepath, ct):
words = sorted(ct.keys())
counts = [str(ct[k]) for k in words]
row = (filepath[2:-4].strip(),', '.join(words), ', '.join(counts))
writer.writerow(row)
words = set(['Elaboration','Academic','Emotion','Character','Future ','Personal','Reporting','Assert','Descript','Narrative','Interactive','Past','Reason','Public','Directives','Future'])
count_words_in_dir('.', words, action=print_csv)
ofile.close()
I currently get an output like the below in a CSV. What I want to do is have each row sorted by descending. So in the example below I would have the first row be Personal, Emotion, Narrative, Past in the Class column and 9,6,5,4 in the Count column
Title Class Count
Passage11a Emotion, Narrative, Past, Personal 6, 5, 4, 9
Passage15a Public, Reason, Reporting 3, 2, 12
Passage15b Public, Reason, Reporting 10, 4, 14
I know it's kludgy as hell, but I'm in still a newb. any help or ideas would be greatly appreciated.
Solved it:
OrderedDict(sorted(ct.iteritems(), key=lambda item: item[1],reverse=True))
OrderedDict gave me what I wanted.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.