繁体   English   中英

需要在python中比较两个csv文件

[英]need to compare two csv files in python

我有两个csv格式的文件并且想要比较columns.i希望列表出现在其他csv文件中

import tkinter as tk
import pandas as pd
from tkinter.filedialog import askopenfilename
def latlong_func(filename):
data = pd.read_csv(filename)
data["latlong"] = 'FALSE'
data.loc[(data["FAC_TYPE"] ==9500)|
(data["FAC_TYPE"] ==9501)|
(data["FAC_TYPE"] ==9502)|
(data["FAC_TYPE"] ==9503)|
(data["FAC_TYPE"] ==9504)|
(data["FAC_TYPE"] ==9506)|
(data["FAC_TYPE"] ==9507)|
(data["FAC_TYPE"] ==9508)|
(data["FAC_TYPE"] ==9509)|
(data["FAC_TYPE"] ==9510)|
(data["FAC_TYPE"] ==9511)|
(data["FAC_TYPE"] ==9514)|
(data["FAC_TYPE"] ==9515)|
(data["FAC_TYPE"] ==9516)|
(data["FAC_TYPE"] ==9517)|
(data["FAC_TYPE"] ==9520)|
(data["FAC_TYPE"] ==9521)|
(data["FAC_TYPE"] ==9522)|
(data["FAC_TYPE"] ==9527)|
(data["FAC_TYPE"] ==9529)|
(data["FAC_TYPE"] ==9529)|
(data["FAC_TYPE"] ==9531)|
(data["FAC_TYPE"] ==9536)|
(data["FAC_TYPE"] ==9529)|
(data["FAC_TYPE"] ==9529)|
(data["FAC_TYPE"] ==9529)|
(data["FAC_TYPE"] ==9540)|
(data["FAC_TYPE"] ==9542)|
(data["FAC_TYPE"] ==9544)|
(data["FAC_TYPE"] ==9545)|
(data["FAC_TYPE"] ==9546)|
(data["FAC_TYPE"] ==9547)|
(data["FAC_TYPE"] ==9548)|
(data["FAC_TYPE"] ==9549)|
(data["FAC_TYPE"] ==9550)|
(data["FAC_TYPE"] ==9551)|
(data["FAC_TYPE"] ==9552)|
(data["FAC_TYPE"] ==9553)|
(data["FAC_TYPE"] ==9554)|
(data["FAC_TYPE"] ==9555)|
(data["FAC_TYPE"] ==9556)|
(data["FAC_TYPE"] ==9557)|
(data["FAC_TYPE"] ==9558)|
(data["FAC_TYPE"] ==9559)|
(data["FAC_TYPE"]==9561)|
(data["FAC_TYPE"]==9562)|
(data["FAC_TYPE"] ==9563)|
(data["FAC_TYPE"] ==9564)|
(data["FAC_TYPE"] ==9566)|
(data["FAC_TYPE"] ==9513)|
(data["FAC_TYPE"] ==9569)|
(data["FAC_TYPE"] ==9572)|
(data["FAC_TYPE"] ==9574)|
(data["FAC_TYPE"] ==9575)|
(data["FAC_TYPE"] ==9576)|
(data["FAC_TYPE"] ==9577)|
(data["FAC_TYPE"] ==9578)|
(data["FAC_TYPE"] ==9580)|
(data["FAC_TYPE"] ==9581)|
(data["FAC_TYPE"] ==9584)|
(data["FAC_TYPE"] ==9585)|
(data["FAC_TYPE"] ==9586)|
(data["FAC_TYPE"] ==9990)|
(data["FAC_TYPE"] ==9589), ["latlong"]] ='TRUE'

data.to_csv("C:/Users/patesari/Desktop/python 
work/practice1.csv")

writer = data[data["latlong"]=='TRUE']
writer.to_csv('C:/Users/patesari/Desktop/python 
work/newfile1.csv', index=False)
writer.to_csv('C:/Users/patesari/Desktop/python 
work/outputfiles/latlong1.csv', index=False)
def get_filename():

filename0 = askopenfilename(filetypes =[('Python Files', '*.csv')])
filename1 = askopenfilename(filetypes =[('Python Files', '*.csv')])
if filename:
    lbl['text'] = filename
    latlong_func(filename)
else:
    lbl['text'] = 'not selected'
root = tk.Tk()
root.geometry('200x100')
lbl = tk.Label(root, text='Select filename')
lbl.pack(side='top', pady=10)
btn = tk.Button(root, text='Open', command=get_filename)
btn.pack(side='top', pady=10)
root.mainloop()

在上面我动态地获取文件,但是我已经硬编码了“或”文件。我想将它与存在列表的第二个csv文件进行比较。而不是硬编码的文件。 我们如何将列表存储在特定路径的csv中,然后比较该文件

我不知道您要如何处理数据,所以我保留空的work_with_data(data1, data2)但是此代码显示了如何获取两个文件名,两次使用read_csv()并最终运行函数work_with_data(data1, data2)

它还使用两个Labels来显示两个文件名。

import tkinter as tk
import pandas as pd
from tkinter.filedialog import askopenfilename

# --- functions ---

def work_with_data(data1, data2):

    print(data1.head())
    print(data2.head())

    # ... here you can works with data1, data2 ...


def get_filenames():
    # default values before reading data

    data1 = None
    data2 = None

    # first filename and data1

    filename1 = askopenfilename(filetypes =[('CSV files', '*.csv')])

    if filename1:
        lbl1['text'] = filename1
        data1 = pd.read_csv(filename1)
    else:
        lbl1['text'] = 'Filename 1 not selected'

    # second filename and data2

    filename2 = askopenfilename(filetypes =[('CSV files', '*.csv')])

    if filename2:
        lbl2['text'] = filename2
        data2 = pd.read_csv(filename2)
    else:
        lbl2['text'] = 'Filename 2 not selected'

    # start working with data

    if data1 is not None and data2 is not None:
       work_with_data(data1, data2)

# --- main ---

root = tk.Tk()
#root.geometry('200x200')

lbl1 = tk.Label(root, text='Select filename 1')
lbl1.pack(side='top', pady=10)

lbl2 = tk.Label(root, text='Select filename 2')
lbl2.pack(side='top', pady=10)

btn = tk.Button(root, text='Open', command=get_filenames)
btn.pack(side='top', pady=10)

root.mainloop()

编辑:我改变一些代码,所以你可以用pandascsv.readerwork_with_data

import tkinter as tk
from tkinter.filedialog import askopenfilename
import pandas as pd

# --- functions ---

def work_with_data(filename1, filename2):

    # --- use pandas ---

    data1 = pd.read_csv(filename1)
    data2 = pd.read_csv(filename2)

    # ... here you can works with data1, data2 ...

    # --- or csv.reader ---

    fp1 = open(filename1)
    reader1 = csv.reader(fp1)

    fp2 = open(filename2)
    reader2 = csv.reader(fp2)

    # ... here you can works with reader1, reader2 ...

    fp2.close()   
    fp1.close()   


def get_filenames():
    # default values before reading data

    filename1 = None
    filename2 = None

    # first filename

    filename1 = askopenfilename(filetypes =[('CSV files', '*.csv')])

    if filename1:
        lbl1['text'] = filename1
    else:
        lbl1['text'] = 'Filename 1 not selected'

    # second filename

    filename2 = askopenfilename(filetypes =[('CSV files', '*.csv')])

    if filename2:
        lbl2['text'] = filename2
    else:
        lbl2['text'] = 'Filename 2 not selected'

    # start working with data

    if filename1 and filename2:
        work_with_data(filename1, filename1)

# --- main ---

root = tk.Tk()
#root.geometry('200x200')

lbl1 = tk.Label(root, text='Select filename 1')
lbl1.pack(side='top', pady=10)

lbl2 = tk.Label(root, text='Select filename 2')
lbl2.pack(side='top', pady=10)

btn = tk.Button(root, text='Open', command=get_filenames)
btn.pack(side='top', pady=10)

root.mainloop()

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM