简体   繁体   中英

Comparing two files in python

I have the following code. I have checked other links on stackoverflow, but they are slightly complicated than mine.

For now my text files have hello (in file1) and hell (in file2) as data.

I believe my logic is correct but I get the following error

TypeError: object of type '_io.TextIOWrapper' has no len()

Where am I going wrong?

def compareString(line1,line2): #sub function to compare strings of files
    i=0 #initial index
    while line1[i]==line2[i]: #compare each line until they are equal
        i=i+1
    if line1[i]!=line2[i]: #if unequal
        print('Mismatch at character ',i,line1[i]) #print error message

def compareMain(): #
    file1=input('Enter the name of the first file: ') #input file1 name
    file2=input('Enter the name of the second file: ') #input file2 name

    fp1=open(file1,'r') #open file1, reading mode
    fp2=open(file2,'r') #open file2, reading mode
    for line1 in range(len(fp1)): #Getting each line of file1
        for line2 in range(len(fp2)): #Getting each line of file2
            compareString(line1,line2) #Call compare function
    fp1.close() #Close file1
    fp2.close() #Close file2

compareMain() #Execute

I would do it like this:

def compare_files():
    file1=input('Enter the name of the first file: ') #input file1 name
    file2=input('Enter the name of the second file: ') #input file2 name
    fp1=open(file1,'r') #open file1, reading mode
    fp2=open(file2,'r') #open file2, reading mode
    if (fp1.read() == fp2.read()):
        print("Files are the same")
    else:
        print("Files are not the same")

compare_files()

Method .read() will return content of the file. We get content of both files and then we compare contents of this files.

You dont need to use range(len(fp1)) . You can use the fp1 directly. That should fix the error.

def compareString(line1,line2): #sub function to compare strings of files
    i=0 #initial index
    while line1[i]==line2[i]: #compare each line until they are equal
        i=i+1
    if line1[i]!=line2[i]: #if unequal
        print('Mismatch at character ',i,line1[i]) #print error message

def compareMain(): #
    file1=input('Enter the name of the first file: ') #input file1 name
    file2=input('Enter the name of the second file: ') #input file2 name

    fp1=open(file1,'r') #open file1, reading mode
    fp2=open(file2,'r') #open file2, reading mode
    for line1 in fp1: #Getting each line of file1
        for line2 in fp2: #Getting each line of file2
            compareString(line1,line2) #Call compare function
    fp1.close() #Close file1
    fp2.close() #Close file2

compareMain() #Execute

As Tris mentioned, I would suggest to use difflib ( https://docs.python.org/2/library/difflib.html ). Here is the snippet that you may use to start:

import difflib
import sys

file1=sys.argv[1] 
file2=sys.argv[2]

line1 = open(file1).readlines()
line2 = open(file2).readlines()

line1_idx = 0
line2_idx = 0

for l1 in line1:
    l1 = l1.rstrip()
    line2_idx = 0
    for l2 in line2:
        l2 = l2.rstrip()
        diff = difflib.SequenceMatcher(None, l1, l2)
        for tag, i1, i2, j1, j2 in diff.get_opcodes():
            if((tag == "delete") or (tag == "replace") or (tag == "insert")):
                print("Mismatch file1-line%d file2-line%d, line1-index[%d:%d] line2-index[%d:%d]" % (line1_idx, line2_idx, i1, i2, j1, j2))
        line2_idx += 1
    line1_idx += 1

I started this answer before I realized that the questioner wanted a diff, not just an identical-or-not check, but I figure this question might be found by people who do want that, so here it is. While the main() function works with only two files, the core code can work with any number of files. Either way, it'll only check up to the first line that doesn't match.

#
# Python 2/3 compatibility
#

from __future__ import print_function

try:
    from itertools import izip as zip  # Python 2
except ImportError:
    pass  # Python 3

try:
    input = raw_input  # Python 2
except NameError:
    pass  # Python 3


#
# You can leave out everything above if you're on Python 3
#

def all_equal(first_item, *items):
    return all(item == first_item for item in items)


def iterables_are_identical(*iterables):
    return all(all_equal(*tup) for tup in zip(*iterables))


def files_are_identical(*files, **kwargs):
    mode = kwargs.get('mode', 'r')

    open_files = []
    try:
        for f in files:
            open_files.append(open(f, mode))

        return iterables_are_identical(*open_files)
    finally:
        for handle in open_files:
            handle.close()


def main():
    msg = 'Enter the name of the %s file: '
    file_a = input(msg % 'first')
    file_b = input(msg % 'second')

    if files_are_identical(file_a, file_b):
        print('Files are identical')
    else:
        print('Files are NOT identical')


if __name__ == '__main__':
    main()

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM