简体   繁体   中英

python tarfile created an extra @PaxHeader file and cause error Cannot utime: Operation not permitted

I have a lambda function that unpack a tgz file, replace the content and filename, and then re-pack it into a new tgz file. If I download the new file from s3 and unpack it locally using 7z on Windows or tar xvzf <new_file_name>.tgz on Ubuntu, it all works fine. However if I sftp the new tgz file to a customer, they got utime error. They sent me the log like this

cd /usr/.../; tar xvzf </usr/.../new_file_name>.tgz;
./
./A/
./A/a.jpg
./A/b.jpg
./A/c.jpg
./something.xml
tar: .:Cannot utime: Operation not permitted.
tar: Exiting with failure status due to previous error

I am wondering if the extra @PaxHeader file is causing the utime error because I can see from the log file that the contents are unpacked already.

Here is my lambda function code.

import os
import sys
import tarfile
import shutil
import boto3
from botocore.exceptions import ClientError

s3_client = boto3.client('s3')
accepted_date = os.environ["Acceptable_Date"]
work_dir = '/tmp'  # os.getcwd() returns "/var/task"
new_dir = '/tmp/new'
current_date = ''

def lambda_handler(event, context):
    for record in event['Records']:
        bucket = record['s3']['bucket']['name']
        key = record['s3']['object']['key']
        tmpkey = key.replace('/', '')
        if accepted_date in tmpkey:
            print('Already has the correct date. No processing')
        else:
            download_path = '{}/{}'.format(work_dir, tmpkey)

            current_date = get_current_date(tmpkey)
            newkey = tmpkey.replace(current_date, accepted_date)

            upload_path = '{}/{}'.format(work_dir,newkey.replace('tgz','tar.gz'))
            print(work_dir)
            s3_client.download_file(bucket, key, download_path)
            process(upload_path)
            s3_client.upload_file(upload_path,bucket, newkey)

            cleanup(download_path)
            print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
            for entry in os.scandir(work_dir):
                print(entry.name)
            print('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb') 

    
def process(upload_path):
    for entry in os.scandir(work_dir):
        if entry.path.endswith(".tgz") and entry.is_file:
            print(entry.path)
            current_date = get_current_date(entry.name)
            decompress(entry.path)
            new_name = replace_content(current_date)
            compress2(upload_path)      

def get_current_date(file_name):
    return file_name.split('_')[1]

def decompress(file_name):
    tar = tarfile.open(file_name, "r:gz")
    tar.extractall(new_dir)
    tar.close()

def compress2(output_filename):
    # if using tgz as file extension, you cannot decompress it using 7z on Windows.
    # Do not add entry by entry. Add the source fold instead.
    for entry in os.scandir(new_dir):
        print('Included: '+entry.path)
    with tarfile.open(output_filename, "w:gz") as tar:
        tar.add(new_dir, arcname='.')

def cleanup(output_filename):
    for entry in os.scandir(work_dir):
        if entry.is_file:
            remove(entry.path, False)
        else:
            remove(entry.path, True)
            
def replace_content(current_date): 
    for entry in os.scandir(new_dir):
        if entry.path.endswith(".XML") and entry.is_file():
            print(entry.path)
            new_path = entry.path.replace(current_date, accepted_date)
            with open(entry.path, "rt") as old:
                with open(new_path, "wt") as new:
                    for line in old:
                        new.write(line.replace(current_date, accepted_date))
            remove(entry.path, False)
            return new_path.replace('XML', 'tgz')

def remove(path, is_dir):
    if os.path.exists(path):
        if is_dir:
            shutil.rmtree(path)
        else:
            os.remove(path)
        print('Removed: '+path)
    else:
        print("The file does not exist: "+path)
 

Any idea what I have done wrong?

PS The file permission for all content files and the tgz file are -rwxrwxrwx and the file owner is me if I download and unpack locally.

试试这个解决方案:

tar = tarfile.open('sample.tar.gz', 'w:gz', format=tarfile.GNU_FORMAT)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM