![](/img/trans.png)
[英]how to apply elasticsearch using python on files in azure-data-lake?
[英]How to fetch files from Windows Shared Network Drive and upload to Azure Data Lake Storage location using Python?
我需要從 Windows 共享網絡驅動器位置獲取文件(以 .xml 作為擴展名)並使用 Python 腳本(在 PyCharm 中)將它們上傳到 ADLS(Azure Data Lake Storage)。
我嘗試使用以下代碼 -
import os
import subprocess
file_src = os.listdir('\\\\<Shared Dir Server>\\<Directory>')
local_directory="F:\\Files\\*"
sasToken="<SAS Token>"
endpoint="https://<storageAccount>.blob.core.windows.net/<container>/<target directory>"
copyscript= str(file_src) + " copy " + "\""+ local_directory + "\"" + "\""+endpoint+sasToken + "\"" + " --recursive"
print(copyscript)
subprocess.call(copyscript)
但它失敗了——
['temp1.xml', 'temp2.xml', 'abc1.xml', 'desf2.xml', 'file.txt'] copy "F:\Files\*""https://<storageAccount>.blob.core.windows.net/<container>/<Target Directory>/sasToken" --recursive
Traceback (most recent call last):
File "C:\Program Files\PycharmProjects\pythonProject\venv\Upload_SharedDrive_Files.py", line 17, in <module>
subprocess.call(myscript)
File "C:\Program Files\Python39\lib\subprocess.py", line 349, in call
with Popen(*popenargs, **kwargs) as p:
File "C:\Program Files\Python39\lib\subprocess.py", line 951, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Program Files\Python39\lib\subprocess.py", line 1420, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
FileNotFoundError: [WinError 2] The system cannot find the file specified
Process finished with exit code 1
我是 Python 的新手。 請幫忙!
謝謝。
我可以使用以下代碼完成此操作(不確定這是否是最好的方法)-
from azure.storage.filedatalake import DataLakeFileClient
from azure.storage.blob import BlobServiceClient
from azure.storage.filedatalake import DataLakeServiceClient
import os
import io
import shutil
import sys
connect_str="DefaultEndpointsProtocol=https;AccountName=<storageAccount>;AccountKey=<storageAccountKey>;EndpointSuffix=core.windows.net"
myfilesystem="<adlsContainer>"
myfolder="F:\\Files"
trgt_dir = "<adlsTargetDirectory>"
datalake_service_client = DataLakeServiceClient.from_connection_string(connect_str)
def upload_file_to_directory(trgt,src, filename, filesystem):
file_system_client = datalake_service_client.get_file_system_client(file_system=filesystem)
directory_client = file_system_client.get_directory_client(trgt)
file_client = directory_client.create_file(filename)
local_file = io.open(os.path.join(src,filename), 'r', errors="ignore")
file_contents = local_file.read()
file_client.upload_data(file_contents, overwrite=True)
sys.path.extend(myfolder)
src = '\\\\<hostServer>\\<sourceDirectory>'
files = os.listdir(src)
dst = "F:\\Files"
for file in files:
if file.endswith('.xml'):
print(os.path.join(src, file))
shutil.copy2(os.path.join(src, file), dst)
for fsrc in os.listdir(myfolder):
print(f"Now uploading {fsrc}")
upload_file_to_directory(trgt_dir,myfolder, fsrc, myfilesystem)
print(f"Now removing {fsrc}")
os.remove(os.path.join(myfolder, fsrc))
請隨時提供您的想法!
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.