Sending EMR Logs to CloudWatch

Question

Is there a way to send EMR logs to CloudWatch instead of S3. We would like to have all our services logs in one location. Seems like the only thing you can do is set up alarms for monitoring but that doesn't cover logging.

https://docs.aws.amazon.com/emr/latest/ManagementGuide/UsingEMR_ViewingMetrics.html

Would I have to install CloudWatch agent on the nodes in the cluster https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/AgentReference.html

Answer 1

you can install the CloudWatch agent via EMR's bootstrap configuration, and configure it to watch log directories. It then starts to push logs to Amazon CloudWatch Logs

Answer 2

You can read the logs from s3 and push them to the cloudwatch using boto3 and delete them from s3 if you do not need. In some use-cases stdout.gz log will be needed to be in the cloudwatch for monitoring purposes.

boto3 documentation on put_log_events

import boto3
import botocore.session
import logging
import time
import datetime
import gzip

def get_session(service_name):
    session = botocore.session.get_session()
    aws_access_key_id = session.get_credentials().access_key
    aws_secret_access_key = session.get_credentials().secret_key
    aws_session_token = session.get_credentials().token
    region = session.get_config_variable('region')

    return boto3.client(
        service_name = service_name,
        region_name = region,
        aws_access_key_id = aws_access_key_id,
        aws_secret_access_key = aws_secret_access_key,
        aws_session_token = aws_session_token
    )

def get_log_file(s3, bucket, key):
    log_file = None

    try:
        obj = s3.get_object(Bucket=bucket, Key=key)
        compressed_body = obj['Body'].read()
        log_file = gzip.decompress(compressed_body)

    except Exception as e:
        logger.error(f"Error reading from bucket : {e}")
        raise

    return log_file

def create_log_events(logs, batch_size):
    log_event_batch = []
    log_event_batch_collection = []

    try:
        for line in logs.splitlines():
            log_event = {'timestamp': int(round(time.time() * 1000)), 'message':line.decode('utf-8')}
        
            if len(log_event_batch) < batch_size:
                log_event_batch.append(log_event)
            else:
                log_event_batch_collection.append(log_event_batch)
                log_event_batch = []
                log_event_batch.append(log_event)

    except Exception as e:
        logger.error(f"Error creating log events : {e}")
        raise       

    log_event_batch_collection.append(log_event_batch)

    return log_event_batch_collection

def create_log_stream_and_push_log_events(logs, log_group, log_stream, log_event_batch_collection, delay):
    response = logs.create_log_stream(logGroupName=log_group, logStreamName=log_stream)
    seq_token = None

    try:
        for log_event_batch in log_event_batch_collection:
            log_event = {
                'logGroupName': log_group,
                'logStreamName': log_stream,
                'logEvents': log_event_batch
            }

            if seq_token:
                log_event['sequenceToken'] = seq_token

            response = logs.put_log_events(**log_event)
            seq_token = response['nextSequenceToken']
            time.sleep(delay)

    except Exception as e:
        logger.error(f"Error pushing log events : {e}")
        raise

The caller function

def main():
    s3 = get_session('s3')
    logs = get_session('logs')

    BUCKET_NAME = 'Your_Bucket_Name'
    KEY = 'logs/emr/Path_To_Log/stdout.gz'
    BATCH_SIZE = 10000         #According to boto3 docs
    PUSH_DELAY = 0.2           #According to boto3 docs 
    LOG_GROUP='test_log_group' #Destination log group
    LOG_STREAM='{}-{}'.format(time.strftime('%Y-%m-%d'),'logstream.log')

    log_file = get_log_file(s3, BUCKET_NAME, KEY)
    log_event_batch_collection = create_log_events(log_file, BATCH_SIZE)
    create_log_stream_and_push_log_events(logs, LOG_GROUP, LOG_STREAM, log_event_batch_collection, PUSH_DELAY)

Sending EMR Logs to CloudWatch

Question

2 answers

solution1
4 ACCPTED 2020-09-09 08:42:16

solution2
0 2022-07-15 19:34:27

Sending EMR Logs to CloudWatch

Question

2 answers

solution1 4 ACCPTED 2020-09-09 08:42:16

solution2 0 2022-07-15 19:34:27

solution1
4 ACCPTED 2020-09-09 08:42:16

solution2
0 2022-07-15 19:34:27