简体   繁体   中英

Python IndexError - Need help sorting key and Value

I need help sorting my key-value pair. My output is in this url http://pastebin.com/ckKAtP5y . However, what I've been trying to do is.

{
    "courses": [
        {
            "professors": [
                {
                    "first_name": "Zvezdelina",
                    "last_name": "Stankova",
                    "professor_url": "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=375269",
                    "helpfullness": 4.3,
                    "clarity": 4.3,
                    "overall_rating": 4.3
                }],
     "course_name": "CHEM 1",
            "course_mentioned_times": 37
        },
        {
            "professors": [
                {
                    "first_name": "Alan",
                    "last_name": "Shabel",
                    "professor_url": "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1309831",
                    "helpfullness": 3.9,
                    "clarity": 3.5,
                    "overall_rating": 3.7
                }],
     "course_name": "CHEMISTRY 231",
     "course_mentioned_times": 50
    }
]

So what I want to do is I want to compare 'CHEM' and 'CHEMISTRY' in "course_name" and just get me the most 'course_mentioned_times' and remove the other one. In this case I'd want CHEMISTRY 231 because it's mentioned 50 times.

Here's what I've been helped with so far.

if __name__ == "__main__":
    import json

    #'output_info.json is http://pastebin.com/ckKAtP5y
    with open('output_info.json') as data_file:
        data = json.load(data_file)

    temp_data = data
    greater = []
    len1 = len(data['courses'])
    len2 = len1

    for i in range(0,len1):
        for j in range(0, len2):
            if i==j:
                continue
            if data['courses'][i]['course_name'][0] == temp_data['courses'][j]['course_name'][0]:
                if data['courses'][i]['course_name'][1] == temp_data['courses'][j]['course_name'][1]:
                    if data['courses'][i]['course_name'][2] == temp_data['courses'][j]['course_name'][2]:
                        if data['courses'][i]['course_mentioned_times']> temp_data['courses'][j]['course_mentioned_times']:
                            greater.append(i)
                        else:
                            greater.append(j)


    final = []
    for i in greater:
      if i not in final:
        final.append(i)

    list_order = []

    for i in range(0,len(data['courses'])):
        list_order.append(i)

    new_final = []
    for i in list_order:
        if i not in final:
            new_final.append(i)

    for i in new_final:
        if i!=new_final[0]:
            i=i-1
        data['courses'].pop(i)

    # Writing the new json data back to data.json file.
    with open('data.json', 'w') as f:
        json.dump(data, f)

This code gives me an IndexError data['courses'].pop(i) IndexError: pop index out of range

After a lot of back and forth in question's comments:

#coding:utf-8

import json

filename = 'data.json'

with open(filename, 'r') as f:
    data = json.load(f)
    courses = data.get('courses', None)

    if courses:
        keys = sorted(set([course.get('course_name', None).strip().split()[0][0:3] for course in courses]))

        results = {'courses': {}}

        for key in keys:
            results['courses'][key] = []
            temp = {}
            for course in courses:
                course_name = course.get('course_name', None)
                professors = course.get('professors', None)
                if course_name.strip().split()[0][0:3] == key:
                    course_mentioned_times = course.get('course_mentioned_times')
                    temp[course_name] = {'course_mentioned_times':course_mentioned_times, 'professors': professors}
            results['courses'][key] = temp
    else:
        raise Exception('No courses could be found on {}'.format(filename))

def get_most_mentioned(name):
    name = name[0:3]
    data = results.get('courses', None).get(name)
    max_mentioned_times = max(map(lambda m: data.get(m, None).get('course_mentioned_times'), data.keys()))

    most_mentioned = []
    for course_name, values in data.items():
        course_mentioned_times = values.get('course_mentioned_times', None)
        if course_mentioned_times == max_mentioned_times:
            most_mentioned.append({'course_name': course_name, 'course_mentioned_times': course_mentioned_times, \
                'professors': values.get('professors')})
    return most_mentioned

print "Course with most mentioned times:"
print "---------------------------------"
for key in keys:
    print "[*] For Key '{}':".format(key)
    for item in get_most_mentioned(key):
        course_name = item.get('course_name', None)
        print "    Course Name: {}".format(course_name)
        print "    Mentioned Times: {}\n".format(item.get('course_mentioned_times'))
        print "    Professors:\n"
        for i, professor in enumerate(item.get('professors', None), start=1):
            print "         {}) Full name: {} {}".format(i, professor.get('first_name'), professor.get('last_name'))
            print "            URL: {}".format(professor.get('professor_url'))
            print "            Helpfullness: {}".format(professor.get('helpfullness'))
            print "            Clarity: {}".format(professor.get('clarity'))
            print "            Overall_rating: {}".format(professor.get('overall_rating'))
            print ""
    print ""
import json
import collections

with open('output_info.json') as data_file:
    data = json.load(data_file)

courses = data['courses']

courses_by_prefix = collections.defaultdict(list)

for course in courses:
    prefix = course['course_name'].split(' ', 2)[0].upper()[:3]
    courses_by_prefix[prefix].append(course)


results = []

for prefix, courselist in courses_by_prefix.items():
    mosttimes = max(courselist, key=lambda c: c['course_mentioned_times'])
    results.append(mosttimes)

print(results)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM