简体   繁体   中英

How to get conflicting files with same relative paths from multiple folders?

I want to make a virtual file system from a few folders and want to check if there are any conflicting files. So I want to provide a few folders and get files with the same path relative to their folders.

How can I find the conflicts?

This is what I've done so far. The get_files and remove_duplicates aren't working as I expected.

from glob import glob
import os
from pathlib import Path
import shutil
import sys


def main():
    folders = sys.argv[1:]

    if len(folders) < 2:
        print("Please provide at least 2 folders")
        exit(1)

    files = get_files(folders)
    conflicting_files = find_conflicting_files(files)
    conflicting_files = remove_duplicates(conflicting_files)
    print_conflicting_files(conflicting_files)


def get_files(folders):
    files = []
    for folder in folders:
        files.extend([os.path.relpath(path, folder) for path in Path(folder).rglob("*")])
    return files


def test_get_files():
    try:
        os.makedirs("test/folder1/a", exist_ok=True)
        os.makedirs("test/folder2/b", exist_ok=True)
        open("test/folder1/a/file", "w").close()
        open("test/folder2/b/file", "w").close()

        folders = ["test/folder1", "test/folder2"]
        assert get_files(folders) == ["a/file", "b/file"]
    finally:
        shutil.rmtree("test")


def find_conflicting_files(files) -> list:
    conflicting_files = []
    for file in files[0]:
        for f in files[1:]:
            if file in f:
                conflicting_files.append(file)

    return conflicting_files


def test_find_conflicting_files():
    files = [
        ["a", "b", "c"],
        ["a", "b", "d"],
        ["a", "b", "e"],
        ["a", "b", "f"],
    ]

    assert find_conflicting_files(files) == ["a", "a", "a", "b", "b", "b"]


def remove_duplicates(files):
    return list(set(files))


def test_remove_duplicates():
    files = ["a", "a", "b", "b", "c", "c"]
    assert remove_duplicates(files) == ["a", "b", "c"]


def print_conflicting_files(files):
    for file in files:
        print(file)


if __name__ == "__main__":
    main()

Try (haven't tested it myself):

def find_conflicting_files(files) -> list:
    from collections import Counter
    return [file for file, cnt in Counter(files).items() if cnt > 1) 

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM