简体   繁体   中英

Using open() with Command Line Argument in Python

I know I'm missing something obvious. I'm using argparse to parse two input files. I get the expected output from the main function when I print the variables 'file1' and 'file2'

However I attempt to use 'file1' and 'file2' in sub functions. I also attempt to print out the new variables(which fail). What I am trying to to is set the command line arguments to variables and then use those variables later in the code.

"""
Created on Fri Oct 21 12:02:34 2016

@author: jsklein
"""
import pandas as pd
import csv
import argparse

# Parse command line arguments and set them to variables to be used later
def main():
    parser = argparse.ArgumentParser(description='Compares Two CSV files for matches and differences indexed on a column')

    parser.add_argument("-i", help="Name of first CSV import file", action="store", dest="infile1", required="yes")
    parser.add_argument("-I", help="Name of second CSV import file", action="store", dest="infile2", required="yes")

    args = parser.parse_args()    
    file1 = args.infile1
    file2 = args.infile2
    print(file1)
    print(file2)
# Define Compare funtion that joins on specified column

    def merge_csvs():
        a = pd.read_csv(file1)
        b = pd.read_csv(file2)
        print(a)
        print(b)

        merged = b.merge(a, on='SWREV')
        merged.to_csv("merged_results.csv", index=False) 

 # Define Diff function that diffs on specified column

    def diff_csvs():
        s = open(file1, 'r')
        k = open(file2, 'r')
        print(s)
        print(k)

        checkS = csv.reader(s)
        checkK = csv.reader(k)

        output1 =  [row for row in checkS if row not in checkK]
        output2 =  [row for row in checkK if row not in checkS]

        with open("A_notin_B.csv", "w") as f:
            writer = csv.writer(f)
            writer.writerows(output1)

        with open("B_notin_A.csv", "w") as l:
            writer = csv.writer(l)
            writer.writerows(output2)

# Main Function that Calls all the other functions


main()

Here is an example of running the code, notice that the other variables 'a', 'b', 's', and 'k' do not print(And yes I'm expecting a lot of output:

$ python csv_compare.py -i csv1.csv -I csv2.csv
csv1.csv
csv2.csv

I'm not sure, but maybe this helps (if this is what you are trying to do):

import pandas as pd
import csv
import argparse

# Parse command line arguments and set them to variables to be used later
def main():
    parser = argparse.ArgumentParser(description='Compares Two CSV files for matches and differences indexed on a column')

    parser.add_argument("-i", help="Name of first CSV import file", action="store", dest="infile1", required="yes")
    parser.add_argument("-I", help="Name of second CSV import file", action="store", dest="infile2", required="yes")

    args = parser.parse_args()    
    file1 = args.infile1
    file2 = args.infile2
    print(file1)
    print(file2)

    # explicitly call the other functions
    merge_csvs(file1,file2)
    diff_csvs(file1,file2)

# Define Compare funtion that joins on specified column
def merge_csvs(file1,file2):
    a = pd.read_csv(file1)
    b = pd.read_csv(file2)
    print(a)
    print(b)

    merged = b.merge(a, on='SWREV')
    merged.to_csv("merged_results.csv", index=False) 

# Define Diff function that diffs on specified column
def diff_csvs(file1,file2):
    s = open(file1, 'r')
    k = open(file2, 'r')
    print(s)
    print(k)

    checkS = csv.reader(s)
    checkK = csv.reader(k)

    output1 =  [row for row in checkS if row not in checkK]
    output2 =  [row for row in checkK if row not in checkS]

    with open("A_notin_B.csv", "w") as f:
        writer = csv.writer(f)
        writer.writerows(output1)

    with open("B_notin_A.csv", "w") as l:
        writer = csv.writer(l)
        writer.writerows(output2)

# Main Function that Calls all the other functions
main()

Basically what I did was:

  • Define your functions outside the main() method

  • Add file1 and file2 as parameters

  • call the two functions from main(), providing file1 and file2 as parameters for each call

The code posted above is untested. I just edited your code

# -*- coding: utf-8 -*-
"""
Created on Fri Oct 21 12:02:34 2016

@author: jsklein
"""
import pandas as pd
import csv
import argparse

# Parse command line arguments and set them to variables to be used later
parser = argparse.ArgumentParser(description='Compares Two CSV files for       matches and differences indexed on a column')

parser.add_argument("-i", help="Name of first CSV import file", action="store", dest="infile1", required="yes")
parser.add_argument("-I", help="Name of second CSV import file", action="store", dest="infile2", required="yes")

args = parser.parse_args()    
file1 = args.infile1
file2 = args.infile2
print(file1)
print(file2)

# Define Compare funtion that joins on specified column

def merge_csvs():
    a = pd.read_csv(file1)
    b = pd.read_csv(file2)
    print(a)
    print(b)
    merged = b.merge(a, on='SWREV')
    merged.to_csv("merged_results.csv", index=False) 

# Define Diff fuction that diffs on specified column

def diff_csvs():

    s = open(file1, 'r')
    k = open(file2, 'r')
    print(s)
    print(k)
    checkS = csv.reader(s)
    checkK = csv.reader(k)

    output1 =  [row for row in checkS if row not in checkK]
    output2 =  [row for row in checkK if row not in checkS]

    with open("A_notin_B.csv", "w") as f:
        writer = csv.writer(f)
        writer.writerows(output1)

    with open("B_notin_A.csv", "w") as l:
        writer = csv.writer(l)
        writer.writerows(output2)

# Main Function that Calls all the other functions
def main():
    merge_csvs()
    diff_csvs()

main()

So I got rid of the arg_parser function which mad the global vars available to the other functions

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM