繁体   English   中英

Python - 如何计算列表中的特定部分

[英]Python - How to count specific section in a list

我是 python 的新手,我正在努力如何在 python 中添加 cvs 文件的某些部分。 我不允许使用“导入 cvs” 我正在从https://vincentarelbundock.github.io/Rdatasets/datasets.html导入 TipJoke CVS 文件

这是迄今为止我唯一有效的代码,我完全不知道从哪里开始。

if __name__ == '__main__':
from pprint import pprint
from string import punctuation
f = open("TipJoke.csv", "r")
tipList = []
for line in f:
    #deletes the quotes
    line = line.replace('"', '')
    tipList.append(line)
pprint(tipList[])

输出:

 [',Card,Tip,Ad,Joke,None\n',
 '1,None,1,0,0,1\n',
 '2,Joke,1,0,1,0\n',
 '3,Ad,0,1,0,0\n',
 '4,None,0,0,0,1\n',
 '5,None,1,0,0,1\n',
 '6,None,0,0,0,1\n',
 '7,Ad,0,1,0,0\n',
 '8,Ad,0,1,0,0\n',
 '9,None,0,0,0,1\n',
 '10,None,0,0,0,1\n',
 '11,None,1,0,0,1\n',
 '12,Ad,0,1,0,0\n',
 '13,None,0,0,0,1\n',
 '14,Ad,1,1,0,0\n',
 '15,Joke,1,0,1,0\n',
 '16,Joke,0,0,1,0\n',
 '17,Joke,1,0,1,0\n',
 '18,None,0,0,0,1\n',
 '19,Joke,0,0,1,0\n',
 '20,None,0,0,0,1\n',
 '21,Ad,1,1,0,0\n',
 '22,Ad,1,1,0,0\n',
 '23,Ad,0,1,0,0\n',
 '24,Joke,0,0,1,0\n',
 '25,Joke,1,0,1,0\n',
 '26,Joke,0,0,1,0\n',
 '27,None,1,0,0,1\n',
 '28,Joke,1,0,1,0\n',
 '29,Joke,1,0,1,0\n',
 '30,None,1,0,0,1\n',
 '31,Joke,0,0,1,0\n',
 '32,None,1,0,0,1\n',
 '33,Joke,1,0,1,0\n',
 '34,Ad,0,1,0,0\n',
 '35,Joke,0,0,1,0\n',
 '36,Ad,1,1,0,0\n',
 '37,Joke,0,0,1,0\n',
 '38,Ad,0,1,0,0\n',
 '39,Joke,0,0,1,0\n',
 '40,Joke,0,0,1,0\n',
 '41,Joke,1,0,1,0\n',
 '42,None,0,0,0,1\n',
 '43,None,0,0,0,1\n',
 '44,Ad,0,1,0,0\n',
 '45,None,0,0,0,1\n',
 '46,None,0,0,0,1\n',
 '47,Ad,0,1,0,0\n',
 '48,Joke,0,0,1,0\n',
 '49,Joke,1,0,1,0\n',
 '50,None,1,0,0,1\n',
 '51,None,0,0,0,1\n',
 '52,Joke,1,0,1,0\n',
 '53,Joke,1,0,1,0\n',
 '54,Joke,0,0,1,0\n',
 '55,None,1,0,0,1\n',
 '56,Ad,0,1,0,0\n',
 '57,Joke,0,0,1,0\n',
 '58,None,0,0,0,1\n',
 '59,Ad,0,1,0,0\n',
 '60,Joke,1,0,1,0\n',
 '61,Ad,0,1,0,0\n',
 '62,None,1,0,0,1\n',
 '63,Joke,0,0,1,0\n',
 '64,Ad,0,1,0,0\n',
 '65,Joke,0,0,1,0\n',
 '66,Ad,0,1,0,0\n',
 '67,Ad,0,1,0,0\n',
 '68,Ad,0,1,0,0\n',
 '69,None,0,0,0,1\n',
 '70,Joke,1,0,1,0\n',
 '71,None,1,0,0,1\n',
 '72,None,0,0,0,1\n',
 '73,None,0,0,0,1\n',
 '74,Joke,0,0,1,0\n',
 '75,Ad,1,1,0,0\n',
 '76,Ad,0,1,0,0\n',
 '77,Ad,1,1,0,0\n',
 '78,Joke,0,0,1,0\n',
 '79,Joke,0,0,1,0\n',
 '80,Ad,1,1,0,0\n',
 '81,Ad,0,1,0,0\n',
 '82,None,0,0,0,1\n',
 '83,Ad,0,1,0,0\n',
 '84,Joke,0,0,1,0\n',
 '85,Joke,0,0,1,0\n',
 '86,Ad,1,1,0,0\n',
 '87,None,1,0,0,1\n',
 '88,Joke,1,0,1,0\n',
 '89,Ad,0,1,0,0\n',
 '90,None,0,0,0,1\n',
 '91,None,0,0,0,1\n',
 '92,Joke,0,0,1,0\n',
 '93,Joke,0,0,1,0\n',
 '94,Ad,0,1,0,0\n',
 '95,Ad,0,1,0,0\n',
 '96,Ad,0,1,0,0\n',
 '97,Joke,1,0,1,0\n',
 '98,None,0,0,0,1\n',
 '99,None,0,0,0,1\n',
 '100,None,1,0,0,1\n',
 '101,Joke,0,0,1,0\n',
 '102,Joke,0,0,1,0\n',
 '103,Ad,1,1,0,0\n',
 '104,Ad,0,1,0,0\n',
 '105,Ad,0,1,0,0\n',
 '106,Ad,1,1,0,0\n',
 '107,Ad,0,1,0,0\n',
 '108,None,0,0,0,1\n',
 '109,Ad,0,1,0,0\n',
 '110,Joke,1,0,1,0\n',
 '111,None,0,0,0,1\n',
 '112,Ad,0,1,0,0\n',
 '113,Ad,0,1,0,0\n',
 '114,None,0,0,0,1\n',
 '115,Ad,0,1,0,0\n',
 '116,None,0,0,0,1\n',
 '117,None,0,0,0,1\n',
 '118,Ad,0,1,0,0\n',
 '119,None,1,0,0,1\n',
 '120,Ad,1,1,0,0\n',
 '121,Ad,0,1,0,0\n',
 '122,Ad,1,1,0,0\n',
 '123,None,0,0,0,1\n',
 '124,None,0,0,0,1\n',
 '125,Joke,1,0,1,0\n',
 '126,Joke,1,0,1,0\n',
 '127,Ad,0,1,0,0\n',
 '128,Joke,0,0,1,0\n',
 '129,Joke,0,0,1,0\n',
 '130,Ad,0,1,0,0\n',
 '131,None,0,0,0,1\n',
 '132,None,0,0,0,1\n',
 '133,None,0,0,0,1\n',
 '134,Joke,1,0,1,0\n',
 '135,Ad,0,1,0,0\n',
 '136,None,0,0,0,1\n',
 '137,Joke,0,0,1,0\n',
 '138,Ad,0,1,0,0\n',
 '139,Ad,0,1,0,0\n',
 '140,None,0,0,0,1\n',
 '141,Joke,0,0,1,0\n',
 '142,None,0,0,0,1\n',
 '143,Ad,0,1,0,0\n',
 '144,None,1,0,0,1\n',
 '145,Joke,0,0,1,0\n',
 '146,Ad,0,1,0,0\n',
 '147,Ad,0,1,0,0\n',
 '148,Ad,0,1,0,0\n',
 '149,Joke,1,0,1,0\n',
 '150,Ad,1,1,0,0\n',
 '151,Joke,1,0,1,0\n',
 '152,None,0,0,0,1\n',
 '153,Ad,0,1,0,0\n',
 '154,None,0,0,0,1\n',
 '155,None,0,0,0,1\n',
 '156,Ad,0,1,0,0\n',
 '157,Ad,0,1,0,0\n',
 '158,Joke,0,0,1,0\n',
 '159,None,0,0,0,1\n',
 '160,Joke,1,0,1,0\n',
 '161,None,1,0,0,1\n',
 '162,Ad,1,1,0,0\n',
 '163,Joke,0,0,1,0\n',
 '164,Joke,0,0,1,0\n',
 '165,Ad,0,1,0,0\n',
 '166,Joke,1,0,1,0\n',
 '167,Joke,1,0,1,0\n',
 '168,Ad,0,1,0,0\n',
 '169,Joke,1,0,1,0\n',
 '170,Joke,0,0,1,0\n',
 '171,Ad,0,1,0,0\n',
 '172,Joke,0,0,1,0\n',
 '173,Joke,0,0,1,0\n',
 '174,Ad,0,1,0,0\n',
 '175,None,0,0,0,1\n',
 '176,Joke,1,0,1,0\n',
 '177,Ad,0,1,0,0\n',
 '178,Joke,0,0,1,0\n',
 '179,Joke,0,0,1,0\n',
 '180,None,0,0,0,1\n',
 '181,None,0,0,0,1\n',
 '182,Ad,0,1,0,0\n',
 '183,None,0,0,0,1\n',
 '184,None,0,0,0,1\n',
 '185,None,0,0,0,1\n',
 '186,None,0,0,0,1\n',
 '187,Ad,0,1,0,0\n',
 '188,None,1,0,0,1\n',
 '189,Ad,0,1,0,0\n',
 '190,Ad,0,1,0,0\n',
 '191,Ad,0,1,0,0\n',
 '192,Joke,1,0,1,0\n',
 '193,Joke,0,0,1,0\n',
 '194,Ad,0,1,0,0\n',
 '195,None,0,0,0,1\n',
 '196,Joke,1,0,1,0\n',
 '197,Joke,0,0,1,0\n',
 '198,Joke,1,0,1,0\n',
 '199,Ad,0,1,0,0\n',
 '200,None,0,0,0,1\n',
 '201,Joke,1,0,1,0\n',
 '202,Joke,0,0,1,0\n',
 '203,Joke,0,0,1,0\n',
 '204,Ad,0,1,0,0\n',
 '205,None,0,0,0,1\n',
 '206,Ad,0,1,0,0\n',
 '207,Ad,0,1,0,0\n',
 '208,Joke,0,0,1,0\n',
 '209,Ad,0,1,0,0\n',
 '210,Joke,0,0,1,0\n',
 '211,None,0,0,0,1\n']

我目前正在尝试查找指定卡片类型的条目总数和为指定卡片类型提供的小费百分比,精度为两位小数。 提示栏是卡片类型(无、广告、笑话)后面的 0 或 1。

这有效

from pprint import pprint
from string import punctuation

counts = {"Joke": 0, "Ad": 0, "None": 0}
with open("TipJoke.csv", "r") as f:
    for line in f:
        line_clean = line.replace('"', "").replace("\n", "").split(",")
        try:
            counts[line_clean[1]] += int(line_clean[2])
        except:
            pass


print(counts)

如果你被允许使用 pandas 库,那么

将熊猫导入为 pd

df = pd.read_csv("TipJoke.csv")

df 是一个 Pandas 数据框对象,您可以根据需要在其中执行多个过滤任务。

例如,如果你想获取笑话的数据,你可以这样过滤:

print(df[df["Card"] == "Joke"])

不过,我只是为您提供方向,而不是您问题的全部逻辑。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM