[英]Download AWS CloudWatch logs for a period
我想从 AWS 下载所有 CloudWatch 日志用于:
我的计划很简单:
import boto3
def overlaps(start1, end1, start2, end2):
return max(start1, start2) < min(end1, end2)
def load_logs(region, group, start=0, end=2672995600000):
client = boto3.client('logs', region_name=region)
paginator = client.get_paginator('describe_log_streams')
response_iterator = paginator.paginate(logGroupName=group)
events = []
for page in response_iterator:
for log_stream in page["logStreams"]:
print(f"Stream: {log_stream['logStreamName']}, start: {log_stream['firstEventTimestamp']} end: {log_stream['lastEventTimestamp']}")
if overlaps(log_stream["firstEventTimestamp"], log_stream["lastEventTimestamp"], start, end):
print("processing")
token = None
while True:
event_args = {
"logGroupName": group,
"logStreamName": log_stream['logStreamName'],
"startTime": start,
"endTime": end
}
if token is not None:
event_args["nextToken"] = token
response = client.get_log_events(**event_args)
for event in response["events"]:
if start < event["timestamp"] < end:
events.append(event)
if response["nextBackwardToken"] == token:
break
else:
token = response["nextBackwardToken"]
print(events)
我传递0
作为start
,传递一个遥远的未来2672995600000
作为end
,并且下载了一些事件,但是events
列表不包含所有 logevents。 我缺少一些迭代吗? 我特别关心get_log_events
迭代
您可以使用start_query
它将返回所有日志流中的所有日志。
import boto3
from datetime import datetime, timedelta
import time
client = boto3.client('logs')
query = "fields @timestamp, @message"
log_group = 'NAME_OF_YOUR_LOG_GROUP'
start_query_response = client.start_query(
logGroupName=log_group,
startTime=int((datetime.today() - timedelta(hours=24)).timestamp()),
endTime=int(datetime.now().timestamp()),
queryString=query,
)
启动查询不会返回日志行,而是返回一个查询 ID。 您需要获取查询结果。
import boto3
REGION = 'us-east-1' # replace with your region
client = boto3.client('logs', region_name=REGION)
CONCLUDED_QUERY_EXECUTION_STATUSES = ['Complete', 'Failed', 'Cancelled', 'Timeout'] # |'Unknown'
SEPARATOR = "MY_SEPARATOR"
def query(start_time: int or float, end_time: int or float):
""" runs a query
:param start_time: The beginning of the time range to query. The range is inclusive, so the specified start time is included in the query. Specified as epoch time, the number of seconds since January 1, 1970, 00:00:00 UTC.
:param end_time: The end of the time range to query. The range is inclusive, so the specified end time is included in the query. Specified as epoch time, the number of seconds since January 1, 1970, 00:00:00 UTC.
:return: a list of log lines
"""
# STEP 1: prepare and start your query, you'll get a query id
body = dict(
logGroupName='YOUR_LOG_GROUP_HERE',
startTime=round(start_time),
endTime=round(end_time),
queryString='fields @timestamp, @message' # complete your query if needed
)
response = client.start_query(**body)
# STEP 2: wait for query result
query_id = response.get('queryId')
response = client.get_query_results(
queryId=query_id
)
while response.get('status') not in CONCLUDED_QUERY_EXECUTION_STATUSES:
print(f"[{i:4}]\t Query status: {response.get('status')}", end='\r')
time.sleep(5) # wait a bit before retrying
response = client.get_query_results(
queryId=query_id
)
print(f"[DONE]\tQuery status: {response.get('status')}")
return [f"{r[0]['value']}{SEPARATOR}{r[1]['value']}" for r in response.get('results')]
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.