[英]How do I create a python script such that it sends an email when csv files in a directory has not updated in the last 24 hours?
我是 python 的新手,并试图了解如何实现自动化。 我有一个文件夹,其中每天更新 5 个 csv 文件,但有时其中一个或两个文件在特定日期不会更新。 我不得不手动检查这个文件夹。 相反,我想以这样的方式自动执行此操作,如果 csv 文件在过去 24 小时内没有更新,它可以向自己发送 email 来提醒我这一点。
我的代码:
import datetime
import glob
import os
import smtplib
import string
now = datetime.datetime.today() #Get current date
list_of_files = glob.glob('c:/Python/*.csv') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime) #get latest file created in folder
newestFileCreationDate = datetime.datetime.utcfromtimestamp(os.path.getctime(latest_file)) # get creation datetime of last file
dif = (now - newestFileCreationDate) #calculating days between actual date and last creation date
logFile = "c:/Python/log.log" #defining a log file
def checkFolder(dif, now, logFile):
if dif > datetime.timedelta(days = 1): #Check if difference between today and last created file is greater than 1 days
HOST = "12.55.13.12" #This must be your smtp server ip
SUBJECT = "Alert! At least 1 day wthout a new file in folder xxxxxxx"
TO = "xx.t@gmail.com"
FROM = "xx.t@gmail.com"
text = "%s - The oldest file in folder it's %s old " %(now, dif)
BODY = string.join((
"From: %s" % FROM,
"To: %s" % TO,
"Subject: %s" % SUBJECT ,
"",
text
), "\r\n")
server = smtplib.SMTP(HOST)
server.sendmail(FROM, [TO], BODY)
server.quit()
file = open(logFile,"a") #Open log file in append mode
file.write("%s - [WARNING] The oldest file in folder it's %s old \n" %(now, dif)) #Write a log
file.close()
else : # If difference between today and last creation file is less than 1 days
file = open(logFile,"a") #Open log file in append mode
file.write("%s - [OK] The oldest file in folder it's %s old \n" %(now, dif)) #write a log
file.close()
checkFolder(dif,now,logFile) #Call function and pass 3 arguments defined before
但是,这不会没有错误地运行,我只想通过邮件通知文件夹中尚未更新的那些文件。 即使它是其中 5 个文件之一或 5 个文件中的 5 个尚未更新。
使用纯 python 简洁的方式
import hashlib
import glob
import json
import smtplib
from email.message import EmailMessage
import time
import schedule #pip install schedule
hasher = hashlib.md5()
size = 65536 #to read large files in chunks
list_of_files = glob.glob('./*.csv') #absolute path for crontab
第 1 部分)首先运行此脚本,然后将其注释掉。 它将创建一个 json 文件,其中包含文件的哈希值。
first_hashes = {}
for x in list_of_files:
with open(x, 'rb') as f:
buf = f.read(size)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(size)
first_hashes[x] = hasher.hexdigest()
with open('hash.json', 'w') as file:
file.write(json.dumps(first_hashes, indent=2))
现在将其注释掉,甚至删除它。
第 2 部分)自动化脚本:
def send_email():
check_hash = {} #Contain hashes that have not changed
with open('hash.json') as f: #absolute path for crontab
data = json.load(f)
for x in list_of_files:
with open(x, 'rb') as f:
buf = f.read(size)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(size)
new_hash = hasher.hexdigest()
#if a hash match with one in data, that file has not changed
if new_hash in data.values():
check_hash[x] = new_hash
data[x] = new_hash
#update our hashes
with open('hash.json', 'w') as file: #absolute path for crontab
file.write(json.dumps(data, indent=2))
if len(check_hash) > 0: #check if there's anything in check_hash
filename="check_hash.txt" #absolute path for crontab
#write to a text file named "check_hash.txt"
with open(filename, 'w') as f: #absolute path for crontab
f.write(json.dumps(check_hash, indent=2))
# for gmail smtp setup watch youtu.be/JRCJ6RtE3xU
EMAIL_ADDRESS = 'SMTPAddress@gmail.com'
EMAIL_PASSWORD = 'SMTPPassWord'
msg = EmailMessage()
msg['Subject'] = 'Unupdated files'
msg['From'] = EMAIL_ADDRESS
msg['To'] = 'receive@gmail.com'
msg.set_content('These file(s) did not update:')
msg.add_attachment(open(filename, "r").read(), filename=filename)
with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
smtp.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
smtp.send_message(msg)
#for faster testing check other options here github.com/dbader/schedule
schedule.every().day.at("10:30").do(send_email)
while 1:
schedule.run_pending()
time.sleep(1)
编辑:如果您重新启动您的电脑,您将需要再次运行此文件以重新启动计划,为避免这种情况,您可以按如下方式使用 crontab(从 youtu.be/j-KgGVbyU08 学习如何):
# mm hh DOM MON DOW command
30 10 * * * python3 path-to-file/email-script.py #Linux
30 10 * * * python path-to-file/email-script.py #Windows
如果当时电脑处于开启状态,这将在每天上午 10:30 运行脚本。 为了更快的测试(每 1 分钟运行一次),请使用:
* * * * * python3 path-to-file/email-script.py
注意:如果你要使用 crontab,你必须对所有文件引用使用绝对路径并替换
schedule.every().day.at("10:30").do(send_email)
while 1:
schedule.run_pending()
time.sleep(1)
和
if __name__ == "__main__":
send_email()
经过测试,它工作得很好!
你在想这样的事情吗?
import os
from datetime import datetime
import smtplib
import textwrap
def send_email_failure():
SERVER = "12.55.13.12" #This must be your smtp server ip
SUBJECT = "Alert! At least 1 day without a new file in folder xxxxxxx"
TO = "xx.t@gmail.com"
FROM = "xx.t@gmail.com"
TEXT = "%s - The oldest file in folder it's %sh old " %(datetime.now(), oldest_time_hour)
"""this is some test documentation in the function"""
message = textwrap.dedent("""\
From: %s
To: %s
Subject: %s
%s
""" % (FROM, ", ".join(TO), SUBJECT, TEXT))
print(message)
# Send the mail
server = smtplib.SMTP(SERVER)
server.sendmail(FROM, TO, message)
server.quit()
def save_log(logFile, ok_or_failure, time_now, delta):
file = open(logFile,"a") #Open log file in append mode
if ok_or_failure != 'ok':
file.write("%s - [WARNING] The oldest file in folder it's %s old \n" %(time_now, delta))
else:
file.write("%s - [OK] The oldest file in folder it's %s old \n" %(time_now, delta))
file.close()
def check_file(filename):
print(filename)
if filename.endswith('.csv'):
print('csv')
try:
mtime = os.path.getmtime(filename) # get modified time
except OSError:
mtime = 0
last_modified_date = datetime.fromtimestamp(mtime)
tdelta = datetime.now() - last_modified_date
hours = tdelta.seconds // 3600 # convert to hours
return hours
else:
return 0
# we check what files are in the dir 'files'
# and their modification time
oldest_time_hour = 0
for path, dirs, files in os.walk('./files'): # this need to be modified by case
for file in files:
# get each file time of modification
time = check_file(path+'/'+file)
if time > 0:
# save the oldest time
if time > oldest_time_hour:
oldest_time_hour = time
# if it is older that 24h
if oldest_time_hour > 24:
save_log('log.log', 'failure', datetime.now(), oldest_time_hour)
send_email_failure()
else:
save_log('log.log', 'ok', datetime.now(), oldest_time_hour)
您还需要一个无休止的循环来运行 python 脚本或一个 chronjob 以每小时左右运行这个 python 脚本
你为什么要检查 last_modified_date? 我建议您使用 md5 校验和检查文件的修改。 我的想法是,如果您有以下文件:
file1.csv
file2.csv
file3.csv
file4.csv
file5.csv
您可以检查他们的 md5 校验和并将结果 + DateTime 写入原始文件旁边的文件中。 如下所示:
file1.csv
file1.csv_checksum
file1.csv_checksum 的内容
时间戳,校验和
1612820511,d41d8cd98f00b204e9800998ecf8427e
您可以使用以下代码检查文件的 md5:
>>> import hashlib
>>> hashlib.md5(open('filename.exe','rb').read()).hexdigest()
然后您可以使用校验和文件中提供的结果检查结果(如果校验和文件不存在,则第一次创建它)
我认为您可以使用这种方法轻松处理它。
起初我从一个任务调度器装饰器开始,它可以让你轮询一个目录以获得固定的延迟:
import time
import functools
def scheduled(fixed_delay):
def decorator_scheduled(func):
functools.wraps(func)
def wrapper_schedule(*args, **kwargs):
result = func(*args, **kwargs)
self = args[0]
delay = getattr(self, fixed_delay)
time.sleep(delay)
return result
return wrapper_schedule
return decorator_scheduled
将其保存为名为task_scheduler.py的单独模块。 我将在我的文件观察器中使用它:
import os
from task_scheduler import scheduled
import smtplib, ssl
class FileWatcher:
def __init__(self,
files_path='./myFiles',
extension='.csv',
poll_delay=2):
self.files_path = files_path
self.extension = extension
self.poll_delay = poll_delay
def notify_host_on_nonchange(self, file_path):
port = 465
smtp_server = "smtp.gmail.com"
sender_email = "sender@gmail.com"
receiver_email = "receiver@gmail.com"
password = "Your password here" #You may want to read it from file
message = f"No change in file: {file_path} for 24 hurs!"
context = ssl.create_default_context()
with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, message)
def watch(self):
try:
while True:
self.poll_()
except KeyboardInterrupt:
log.debug('Polling interrupted by user.')
@scheduled("poll_delay")
def poll_(self,):
for f in os.listdir(self.files_path):
full_path = os.path.join(self.files_path, f)
path_stat = os.stat(full_path)
_, file_ext = os.path.splitext(f)
ctime = path_stat.st_ctime
diff = time.time() - ctime/3600
if diff<=24 or not S_ISREG(path_stat.st_mode) or str(file_ext) != self.extension:
continue
self.notify_host_on_nonchange(full_path)
if __name__ == "__main__":
file_listener = FileWatcher()
file_listener.watch()
上面 class 定义了一个 poll_function 受益于os.stat模块检查修改时间。 如果修改时间小于等于24或者文件不是普通文件(表示是目录)或者没有扩展名你要轮询会跳过,否则调用notify function发邮件. 它使用gmail smtp 服务器示例,但您可以根据自己的环境对其进行更改。 观看 function 是用于连续轮询的包装器。
这个 class 改编自我的机器学习 model 观察程序和加载程序,您可以从我的 github访问该版本和项目。 有关装饰器和脚本的进一步说明,您可以查看我的媒体帖子。
当然我不知道 CSV 但我会导入时间并使用格式和时间。 睡眠 function 创建一个定时器。 time 模块的好处是您可以将其配置为在 time 结束后为变量设置值。 所以也许如果你这样做并放入一个 if 语句,当变量达到一个值时,发送 email。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.