简体   繁体   中英

Sort multiple items within a row from a list by descending order in Python

I am trying to get each row sorted in descending order.

Right now this script splits a large file at a pattern 'Title=', creates individual files and renames them for what comes after the 'Title=' then counts the frequency of a set of words within each file. It then writes those to a CSV file.

import os
from collections import *
import glob
import csv



select='D:\To.Sort.txt'
sav='Sorted.3.csv'

def files():
    n = 0
    while True:
        n += 1
        yield open('wworking%d.txt' % n, 'w')

pat = 'Title='
fs = files()
outfile = next(fs)

with open(select,'r') as infile:
    for line in infile:
        if pat not in line:
            outfile.write(line)
        else:
            items = line.split(pat)
            outfile.write(items[0])
            for item in items[1:]:
                outfile = next(fs)
                outfile.write(pat + item)

infile.close()
outfile.close()
os.remove('wworking1.txt')



for filename in os.listdir("."):
    if not filename.startswith("wworking"): continue
    base, ext = os.path.splitext(filename)
    with open(filename, 'r') as infile:
        newname = infile.next().rstrip()
        newname = newname.replace(newname[:6],'')
    newname += ext
    os.rename(filename, newname)

ofile  = open(sav, "wb")
writer = csv.writer(ofile, quoting=csv.QUOTE_NONNUMERIC)
writer.writerow( ('Title', 'Class', 'Count') )

def word_frequency(fileobj, words):
    """Build a Counter of specified words in fileobj"""
    # initialise the counter to 0 for each word
    ct = Counter(dict((w, 0) for w in words))
    file_words = (word for line in fileobj for word in line.split())
    filtered_words = (word for word in file_words if word in words)
    return Counter(filtered_words)


def count_words_in_dir(dirpath, words, action=None):
    """For each .txt file in a dir, count the specified words"""
    for filepath in glob.iglob(os.path.join(dirpath, '*.txt')):
        with open(filepath) as f:
            ct = word_frequency(f, words)
            if action:
                action(filepath, ct)

def print_csv(filepath, ct):
    words = sorted(ct.keys())
    counts = [str(ct[k]) for k in words]
    row = (filepath[2:-4].strip(),', '.join(words), ', '.join(counts))
    writer.writerow(row)

words = set(['Elaboration','Academic','Emotion','Character','Future ','Personal','Reporting','Assert','Descript','Narrative','Interactive','Past','Reason','Public','Directives','Future'])
count_words_in_dir('.', words, action=print_csv)

ofile.close()

I currently get an output like the below in a CSV. What I want to do is have each row sorted by descending. So in the example below I would have the first row be Personal, Emotion, Narrative, Past in the Class column and 9,6,5,4 in the Count column

Title       Class                               Count
Passage11a  Emotion, Narrative, Past, Personal  6, 5, 4, 9
Passage15a  Public, Reason, Reporting           3, 2, 12
Passage15b  Public, Reason, Reporting           10, 4, 14

I know it's kludgy as hell, but I'm in still a newb. any help or ideas would be greatly appreciated.

Solved it:

OrderedDict(sorted(ct.iteritems(), key=lambda item: item[1],reverse=True))

OrderedDict gave me what I wanted.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM