Is there a way to send EMR logs to CloudWatch instead of S3. We would like to have all our services logs in one location. Seems like the only thing you can do is set up alarms for monitoring but that doesn't cover logging.
https://docs.aws.amazon.com/emr/latest/ManagementGuide/UsingEMR_ViewingMetrics.html
Would I have to install CloudWatch agent on the nodes in the cluster https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/AgentReference.html
you can install the CloudWatch agent via EMR's bootstrap configuration, and configure it to watch log directories. It then starts to push logs to Amazon CloudWatch Logs
You can read the logs from s3 and push them to the cloudwatch using boto3 and delete them from s3 if you do not need. In some use-cases stdout.gz log will be needed to be in the cloudwatch for monitoring purposes.
boto3 documentation on put_log_events
import boto3
import botocore.session
import logging
import time
import datetime
import gzip
def get_session(service_name):
session = botocore.session.get_session()
aws_access_key_id = session.get_credentials().access_key
aws_secret_access_key = session.get_credentials().secret_key
aws_session_token = session.get_credentials().token
region = session.get_config_variable('region')
return boto3.client(
service_name = service_name,
region_name = region,
aws_access_key_id = aws_access_key_id,
aws_secret_access_key = aws_secret_access_key,
aws_session_token = aws_session_token
)
def get_log_file(s3, bucket, key):
log_file = None
try:
obj = s3.get_object(Bucket=bucket, Key=key)
compressed_body = obj['Body'].read()
log_file = gzip.decompress(compressed_body)
except Exception as e:
logger.error(f"Error reading from bucket : {e}")
raise
return log_file
def create_log_events(logs, batch_size):
log_event_batch = []
log_event_batch_collection = []
try:
for line in logs.splitlines():
log_event = {'timestamp': int(round(time.time() * 1000)), 'message':line.decode('utf-8')}
if len(log_event_batch) < batch_size:
log_event_batch.append(log_event)
else:
log_event_batch_collection.append(log_event_batch)
log_event_batch = []
log_event_batch.append(log_event)
except Exception as e:
logger.error(f"Error creating log events : {e}")
raise
log_event_batch_collection.append(log_event_batch)
return log_event_batch_collection
def create_log_stream_and_push_log_events(logs, log_group, log_stream, log_event_batch_collection, delay):
response = logs.create_log_stream(logGroupName=log_group, logStreamName=log_stream)
seq_token = None
try:
for log_event_batch in log_event_batch_collection:
log_event = {
'logGroupName': log_group,
'logStreamName': log_stream,
'logEvents': log_event_batch
}
if seq_token:
log_event['sequenceToken'] = seq_token
response = logs.put_log_events(**log_event)
seq_token = response['nextSequenceToken']
time.sleep(delay)
except Exception as e:
logger.error(f"Error pushing log events : {e}")
raise
The caller function
def main():
s3 = get_session('s3')
logs = get_session('logs')
BUCKET_NAME = 'Your_Bucket_Name'
KEY = 'logs/emr/Path_To_Log/stdout.gz'
BATCH_SIZE = 10000 #According to boto3 docs
PUSH_DELAY = 0.2 #According to boto3 docs
LOG_GROUP='test_log_group' #Destination log group
LOG_STREAM='{}-{}'.format(time.strftime('%Y-%m-%d'),'logstream.log')
log_file = get_log_file(s3, BUCKET_NAME, KEY)
log_event_batch_collection = create_log_events(log_file, BATCH_SIZE)
create_log_stream_and_push_log_events(logs, LOG_GROUP, LOG_STREAM, log_event_batch_collection, PUSH_DELAY)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.