I need help sorting my key-value pair. My output is in this url http://pastebin.com/ckKAtP5y . However, what I've been trying to do is.
{
"courses": [
{
"professors": [
{
"first_name": "Zvezdelina",
"last_name": "Stankova",
"professor_url": "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=375269",
"helpfullness": 4.3,
"clarity": 4.3,
"overall_rating": 4.3
}],
"course_name": "CHEM 1",
"course_mentioned_times": 37
},
{
"professors": [
{
"first_name": "Alan",
"last_name": "Shabel",
"professor_url": "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1309831",
"helpfullness": 3.9,
"clarity": 3.5,
"overall_rating": 3.7
}],
"course_name": "CHEMISTRY 231",
"course_mentioned_times": 50
}
]
So what I want to do is I want to compare 'CHEM' and 'CHEMISTRY' in "course_name" and just get me the most 'course_mentioned_times' and remove the other one. In this case I'd want CHEMISTRY 231 because it's mentioned 50 times.
Here's what I've been helped with so far.
if __name__ == "__main__":
import json
#'output_info.json is http://pastebin.com/ckKAtP5y
with open('output_info.json') as data_file:
data = json.load(data_file)
temp_data = data
greater = []
len1 = len(data['courses'])
len2 = len1
for i in range(0,len1):
for j in range(0, len2):
if i==j:
continue
if data['courses'][i]['course_name'][0] == temp_data['courses'][j]['course_name'][0]:
if data['courses'][i]['course_name'][1] == temp_data['courses'][j]['course_name'][1]:
if data['courses'][i]['course_name'][2] == temp_data['courses'][j]['course_name'][2]:
if data['courses'][i]['course_mentioned_times']> temp_data['courses'][j]['course_mentioned_times']:
greater.append(i)
else:
greater.append(j)
final = []
for i in greater:
if i not in final:
final.append(i)
list_order = []
for i in range(0,len(data['courses'])):
list_order.append(i)
new_final = []
for i in list_order:
if i not in final:
new_final.append(i)
for i in new_final:
if i!=new_final[0]:
i=i-1
data['courses'].pop(i)
# Writing the new json data back to data.json file.
with open('data.json', 'w') as f:
json.dump(data, f)
This code gives me an IndexError data['courses'].pop(i) IndexError: pop index out of range
After a lot of back and forth in question's comments:
#coding:utf-8
import json
filename = 'data.json'
with open(filename, 'r') as f:
data = json.load(f)
courses = data.get('courses', None)
if courses:
keys = sorted(set([course.get('course_name', None).strip().split()[0][0:3] for course in courses]))
results = {'courses': {}}
for key in keys:
results['courses'][key] = []
temp = {}
for course in courses:
course_name = course.get('course_name', None)
professors = course.get('professors', None)
if course_name.strip().split()[0][0:3] == key:
course_mentioned_times = course.get('course_mentioned_times')
temp[course_name] = {'course_mentioned_times':course_mentioned_times, 'professors': professors}
results['courses'][key] = temp
else:
raise Exception('No courses could be found on {}'.format(filename))
def get_most_mentioned(name):
name = name[0:3]
data = results.get('courses', None).get(name)
max_mentioned_times = max(map(lambda m: data.get(m, None).get('course_mentioned_times'), data.keys()))
most_mentioned = []
for course_name, values in data.items():
course_mentioned_times = values.get('course_mentioned_times', None)
if course_mentioned_times == max_mentioned_times:
most_mentioned.append({'course_name': course_name, 'course_mentioned_times': course_mentioned_times, \
'professors': values.get('professors')})
return most_mentioned
print "Course with most mentioned times:"
print "---------------------------------"
for key in keys:
print "[*] For Key '{}':".format(key)
for item in get_most_mentioned(key):
course_name = item.get('course_name', None)
print " Course Name: {}".format(course_name)
print " Mentioned Times: {}\n".format(item.get('course_mentioned_times'))
print " Professors:\n"
for i, professor in enumerate(item.get('professors', None), start=1):
print " {}) Full name: {} {}".format(i, professor.get('first_name'), professor.get('last_name'))
print " URL: {}".format(professor.get('professor_url'))
print " Helpfullness: {}".format(professor.get('helpfullness'))
print " Clarity: {}".format(professor.get('clarity'))
print " Overall_rating: {}".format(professor.get('overall_rating'))
print ""
print ""
import json
import collections
with open('output_info.json') as data_file:
data = json.load(data_file)
courses = data['courses']
courses_by_prefix = collections.defaultdict(list)
for course in courses:
prefix = course['course_name'].split(' ', 2)[0].upper()[:3]
courses_by_prefix[prefix].append(course)
results = []
for prefix, courselist in courses_by_prefix.items():
mosttimes = max(courselist, key=lambda c: c['course_mentioned_times'])
results.append(mosttimes)
print(results)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.