I am new to python and trying to understanding how to automate stuff. I have a folder in which 5 csv files get updated daily, however sometimes one of them or two dont on particular days. Im having to manually check this folder. Instead I want to automate this in such a way that if a csv file does not update in the last 24hours, It can send an email to myself alerting me of this.
My code:
import datetime
import glob
import os
import smtplib
import string
now = datetime.datetime.today() #Get current date
list_of_files = glob.glob('c:/Python/*.csv') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime) #get latest file created in folder
newestFileCreationDate = datetime.datetime.utcfromtimestamp(os.path.getctime(latest_file)) # get creation datetime of last file
dif = (now - newestFileCreationDate) #calculating days between actual date and last creation date
logFile = "c:/Python/log.log" #defining a log file
def checkFolder(dif, now, logFile):
if dif > datetime.timedelta(days = 1): #Check if difference between today and last created file is greater than 1 days
HOST = "12.55.13.12" #This must be your smtp server ip
SUBJECT = "Alert! At least 1 day wthout a new file in folder xxxxxxx"
TO = "xx.t@gmail.com"
FROM = "xx.t@gmail.com"
text = "%s - The oldest file in folder it's %s old " %(now, dif)
BODY = string.join((
"From: %s" % FROM,
"To: %s" % TO,
"Subject: %s" % SUBJECT ,
"",
text
), "\r\n")
server = smtplib.SMTP(HOST)
server.sendmail(FROM, [TO], BODY)
server.quit()
file = open(logFile,"a") #Open log file in append mode
file.write("%s - [WARNING] The oldest file in folder it's %s old \n" %(now, dif)) #Write a log
file.close()
else : # If difference between today and last creation file is less than 1 days
file = open(logFile,"a") #Open log file in append mode
file.write("%s - [OK] The oldest file in folder it's %s old \n" %(now, dif)) #write a log
file.close()
checkFolder(dif,now,logFile) #Call function and pass 3 arguments defined before
However, this does not run without error and I just want to be notified by mail of those files in the folder that havent been updated. even if it is one of out 5 files of them or 5 out of 5 that havent updated.
Use pure python and concise way
import hashlib
import glob
import json
import smtplib
from email.message import EmailMessage
import time
import schedule #pip install schedule
hasher = hashlib.md5()
size = 65536 #to read large files in chunks
list_of_files = glob.glob('./*.csv') #absolute path for crontab
Part 1) Run this script first then comment it out. It will create a json file with hashes of your files.
first_hashes = {}
for x in list_of_files:
with open(x, 'rb') as f:
buf = f.read(size)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(size)
first_hashes[x] = hasher.hexdigest()
with open('hash.json', 'w') as file:
file.write(json.dumps(first_hashes, indent=2))
Now comment it out or even delete it.
Part 2) Automation script:
def send_email():
check_hash = {} #Contain hashes that have not changed
with open('hash.json') as f: #absolute path for crontab
data = json.load(f)
for x in list_of_files:
with open(x, 'rb') as f:
buf = f.read(size)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(size)
new_hash = hasher.hexdigest()
#if a hash match with one in data, that file has not changed
if new_hash in data.values():
check_hash[x] = new_hash
data[x] = new_hash
#update our hashes
with open('hash.json', 'w') as file: #absolute path for crontab
file.write(json.dumps(data, indent=2))
if len(check_hash) > 0: #check if there's anything in check_hash
filename="check_hash.txt" #absolute path for crontab
#write to a text file named "check_hash.txt"
with open(filename, 'w') as f: #absolute path for crontab
f.write(json.dumps(check_hash, indent=2))
# for gmail smtp setup watch youtu.be/JRCJ6RtE3xU
EMAIL_ADDRESS = 'SMTPAddress@gmail.com'
EMAIL_PASSWORD = 'SMTPPassWord'
msg = EmailMessage()
msg['Subject'] = 'Unupdated files'
msg['From'] = EMAIL_ADDRESS
msg['To'] = 'receive@gmail.com'
msg.set_content('These file(s) did not update:')
msg.add_attachment(open(filename, "r").read(), filename=filename)
with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
smtp.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
smtp.send_message(msg)
#for faster testing check other options here github.com/dbader/schedule
schedule.every().day.at("10:30").do(send_email)
while 1:
schedule.run_pending()
time.sleep(1)
EDIT: If you restart your pc, you will need to run this file again to restart schedule, to avoid that, you can use crontab as follows (learn how from youtu.be/j-KgGVbyU08):
# mm hh DOM MON DOW command
30 10 * * * python3 path-to-file/email-script.py #Linux
30 10 * * * python path-to-file/email-script.py #Windows
This will run the script everyday at 10:30 AM IF the pc is ON at that time. For faster testing (run every 1 minute) use:
* * * * * python3 path-to-file/email-script.py
NOTE: If you gonna use crontab, you MUST use absolute path for all file references and replace
schedule.every().day.at("10:30").do(send_email)
while 1:
schedule.run_pending()
time.sleep(1)
with
if __name__ == "__main__":
send_email()
Tested and it's working great!
Are you thinking of something like this?
import os
from datetime import datetime
import smtplib
import textwrap
def send_email_failure():
SERVER = "12.55.13.12" #This must be your smtp server ip
SUBJECT = "Alert! At least 1 day without a new file in folder xxxxxxx"
TO = "xx.t@gmail.com"
FROM = "xx.t@gmail.com"
TEXT = "%s - The oldest file in folder it's %sh old " %(datetime.now(), oldest_time_hour)
"""this is some test documentation in the function"""
message = textwrap.dedent("""\
From: %s
To: %s
Subject: %s
%s
""" % (FROM, ", ".join(TO), SUBJECT, TEXT))
print(message)
# Send the mail
server = smtplib.SMTP(SERVER)
server.sendmail(FROM, TO, message)
server.quit()
def save_log(logFile, ok_or_failure, time_now, delta):
file = open(logFile,"a") #Open log file in append mode
if ok_or_failure != 'ok':
file.write("%s - [WARNING] The oldest file in folder it's %s old \n" %(time_now, delta))
else:
file.write("%s - [OK] The oldest file in folder it's %s old \n" %(time_now, delta))
file.close()
def check_file(filename):
print(filename)
if filename.endswith('.csv'):
print('csv')
try:
mtime = os.path.getmtime(filename) # get modified time
except OSError:
mtime = 0
last_modified_date = datetime.fromtimestamp(mtime)
tdelta = datetime.now() - last_modified_date
hours = tdelta.seconds // 3600 # convert to hours
return hours
else:
return 0
# we check what files are in the dir 'files'
# and their modification time
oldest_time_hour = 0
for path, dirs, files in os.walk('./files'): # this need to be modified by case
for file in files:
# get each file time of modification
time = check_file(path+'/'+file)
if time > 0:
# save the oldest time
if time > oldest_time_hour:
oldest_time_hour = time
# if it is older that 24h
if oldest_time_hour > 24:
save_log('log.log', 'failure', datetime.now(), oldest_time_hour)
send_email_failure()
else:
save_log('log.log', 'ok', datetime.now(), oldest_time_hour)
also you will need an end-less loop to run the python script or a chronjob to run this python script every hour or so
Why are you checking the last_modified_date? I suggest you to check the modification of the file with md5 checksum. My Idea is, if you have following files:
file1.csv
file2.csv
file3.csv
file4.csv
file5.csv
You can check their md5 checksum and write the result + DateTime into a file next to the original file. like following:
file1.csv
file1.csv_checksum
Content of file1.csv_checksum
timestamp,checksum
1612820511,d41d8cd98f00b204e9800998ecf8427e
you can check md5 of a file with following code:
>>> import hashlib
>>> hashlib.md5(open('filename.exe','rb').read()).hexdigest()
then you can check the result with the provided one in the checksum file ( and if the checksum file does not exist, just create it for the first time )
I think you can easily handle it with this approach.
At first i started with a task scheduler decorator which will enable you to poll a directory for a fixed delay:
import time
import functools
def scheduled(fixed_delay):
def decorator_scheduled(func):
functools.wraps(func)
def wrapper_schedule(*args, **kwargs):
result = func(*args, **kwargs)
self = args[0]
delay = getattr(self, fixed_delay)
time.sleep(delay)
return result
return wrapper_schedule
return decorator_scheduled
Saved it as a seperate module named task_scheduler.py . I will use it in my file watcher:
import os
from task_scheduler import scheduled
import smtplib, ssl
class FileWatcher:
def __init__(self,
files_path='./myFiles',
extension='.csv',
poll_delay=2):
self.files_path = files_path
self.extension = extension
self.poll_delay = poll_delay
def notify_host_on_nonchange(self, file_path):
port = 465
smtp_server = "smtp.gmail.com"
sender_email = "sender@gmail.com"
receiver_email = "receiver@gmail.com"
password = "Your password here" #You may want to read it from file
message = f"No change in file: {file_path} for 24 hurs!"
context = ssl.create_default_context()
with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, message)
def watch(self):
try:
while True:
self.poll_()
except KeyboardInterrupt:
log.debug('Polling interrupted by user.')
@scheduled("poll_delay")
def poll_(self,):
for f in os.listdir(self.files_path):
full_path = os.path.join(self.files_path, f)
path_stat = os.stat(full_path)
_, file_ext = os.path.splitext(f)
ctime = path_stat.st_ctime
diff = time.time() - ctime/3600
if diff<=24 or not S_ISREG(path_stat.st_mode) or str(file_ext) != self.extension:
continue
self.notify_host_on_nonchange(full_path)
if __name__ == "__main__":
file_listener = FileWatcher()
file_listener.watch()
Above class defines a poll_ function which benefits from os.stat module to check the modification time. If modification time smaller than or equal to 24 or the file is not a regular file (means that it is a directory) or it does not have the extension you look for polling will skip it, otherwise calls the notify function to send e-mail. It uses the gmail smtp server example but you can change it as appropriate for your environment. Watch function is a wrapper for continous polling.
This class is adapted from my machine learning model watcher and loader, you can access that version and project from my github . For further explanation about decorator and script you can check out my medium post .
Granted I don't know CSV but I would import time and using the format and time. Sleep function create a timer. What's good about time module is that you can configure it to set a value to a variable after time is up. SO maybe if you do that and put into an if statement, when the variable reaches a value, send the email.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.