简体   繁体   English

如何从 node.js 中的 s3 获取 HEAD 对象? (在 aws lambda 中运行)

[英]How do you get the HEAD object from an s3 in node.js? (running in an aws lambda)

I'm trying to run a lambda that inserts metadata into my db every time an object is put into my S3 bucket.我正在尝试运行一个 lambda,每次将对象放入我的 S3 存储桶时,它都会将元数据插入到我的数据库中。

Because I am using MongoDB I have code to persist db connections in between calls.因为我使用的是 MongoDB,所以我有代码可以在调用之间保持数据库连接。 The problem I am having is that I can't get the metadata.我遇到的问题是我无法获取元数据。

All the code for inserting into the DB has been done, I just need help getting the metadata from an aws lambda.插入数据库的所有代码都已完成,我只需要帮助从 aws lambda 获取元数据。

Here is my code (it is mostly copied from the MongoDB site )这是我的代码(它主要是从 MongoDB 站点复制的)

"use strict";
const MongoClient = require('mongodb').MongoClient;
const MONGODB_URI = 'mongodb://cam_writer:1%40kGM%26LL%26gA5y7NVk1cvl9@cluster0-shard-00-00-hlygq.mongodb.net:27017,cluster0-shard-00-01-hlygq.mongodb.net:27017,cluster0-shard-00-02-hlygq.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin&retryWrites=true'; // or Atlas connection string

var AWS = require('aws-sdk')
var s3 = AWS.S3()
let cachedDb = null;

function connectToDatabase (uri) {

  console.log('=> connect to database');

  if (cachedDb) {
    console.log('=> using cached database instance');
    return Promise.resolve(cachedDb);
  }



  return MongoClient.connect(uri)
    .then(client => {
      cachedDb = client.db('events');
      return cachedDb;
    });

}


function queryDatabase (db) {
  console.log('=> query database');

  return db.collection('detection_events').find({}).toArray()
    .then(() => { return { statusCode: 200, body: 'success' }; })
    .catch(err => {
      console.log('=> an error occurred: ', err);
      return { statusCode: 500, body: 'error' };
    });
}

function insertIntoDb (db, obj) {
  console.log('=> inserting data into db');

  return db.collection('detection_events').insertOne(obj)
}

module.exports.handler = (event, context, callback) => {

  context.callbackWaitsForEmptyEventLoop = false;

  console.log(event)

  var meta = {test : "test", "key": event}; // HOW DO I GET THE ACTUAL METADATA FOR THE EVENT?

  console.log('event: ', event);

  connectToDatabase(MONGODB_URI)
    .then(db => insertIntoDb(db, meta))
    .then(result => {
      console.log('=> returning result: ', result);
      callback(null, result);
    })
    .catch(err => {
      console.log('=> an error occurred: ', err);
      callback(err);
    });
};

I know that the "event" passed into a lambda by s3 does not contain the metadata.我知道由 s3 传递给 lambda 的“事件”不包含元数据。 In python I was able to get the metadata by using boto3, I just don't know how to do it in node.js, (let alone node.js in an aws lambda)在python中,我能够使用boto3获取元数据,我只是不知道如何在node.js中做到这一点(更不用说aws lambda中的node.js)

EDIT:编辑:

So I've updated my code as per the first answer below.所以我已经按照下面的第一个答案更新了我的代码。 The code is now:现在的代码是:

"use strict";
const MongoClient = require('mongodb').MongoClient;
const MONGODB_URI = 'mongodb://cam_writer:1%40kGM%26LL%26gA5y7NVk1cvl9@cluster0-shard-00-00-hlygq.mongodb.net:27017,cluster0-shard-00-01-hlygq.mongodb.net:27017,cluster0-shard-00-02-hlygq.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin&retryWrites=true'; // or Atlas connection string

const AWS = require('aws-sdk')
const s3 = new AWS.S3()
let cachedDb = null;

const connectToDatabase = uri => {

    console.log('=> connect to database');

    if (cachedDb) {
        console.log('=> using cached database instance');
        return Promise.resolve(cachedDb);
    }

    return MongoClient.connect(uri)
        .then(client => {
            cachedDb = client.db('events');
            return Promise.resolve(cachedDb);
        });

}

function insertIntoDb(db, obj) {
    console.log('=> inserting data into db');

    return db.collection('detection_events').insertOne(obj)
}

module.exports.handler = async (event) => {

    const db = await connectToDatabase(MONGODB_URI);

    //finally get the HEAD for the s3Object
    const head = await s3.headObject({
        Bucket: event.Records[0].s3.bucket.name,
        Key: event.Records[0].s3.object.key
    }).promise();

    var meta = head['Metadata']
    meta['bucket'] = event.Records[0].s3.bucket.name,
    meta['key'] = event.Records[0].s3.object.key
    console.log(meta)

    const result = await insertIntoDb(db, meta)

    console.log(result)
    return {
        statusCode: 201,
        body: JSON.stringify(result)
    }
};

I ran my code, which inserts a bunch of images into the s3.我运行了我的代码,它将一堆图像插入到 s3 中。 This geenrated about 25 connections in mongodb, how can i keep the connections low with the lambda?这在 mongodb 中产生了大约 25 个连接,我如何保持与 lambda 的连接低? I thought the code copied from the mongoDB website would allow me to do this我认为从 mongoDB 网站复制的代码可以让我这样做

Since you're using an S3 Event, you can get the s3 bucket and key by accessing event.Records[0].s3.bucket.name and event.Records[0].s3.object.key .由于您使用的是S3的事件,你可以通过访问S3存储关键event.Records[0].s3.bucket.nameevent.Records[0].s3.object.key It could be easily done with the following code:可以使用以下代码轻松完成:

const params = {
  Bucket: event.Records[0].s3.bucket.name, 
  Key: event.Records[0].s3.object.key
 };
 s3.headObject(params, function(err, data) {
  if (err) {
    console.log(err, err.stack);
    return;
  }
  console.log(data)
});

Just make sure you put this inside your DB callback, otherwise, you'll lose track of it.只要确保你把它放在你的数据库回调中,否则,你会忘记它。

I'd highly recommend you to use async/await though, as you won't have to deal with the famous callback hell .不过,我强烈建议您使用async/await ,因为您不必处理著名的回调地狱 Here's the refactored code:这是重构后的代码:

"use strict";
const MongoClient = require('mongodb').MongoClient;
const MONGODB_URI = 'mongodb://cam_writer:1%40kGM%26LL%26gA5y7NVk1cvl9@cluster0-shard-00-00-hlygq.mongodb.net:27017,cluster0-shard-00-01-hlygq.mongodb.net:27017,cluster0-shard-00-02-hlygq.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin&retryWrites=true'; // or Atlas connection string

const AWS = require('aws-sdk')
const s3 = AWS.S3()
let cachedDb = null;

const connectToDatabase = uri => {

  console.log('=> connect to database');

  if (cachedDb) {
    console.log('=> using cached database instance');
    return Promise.resolve(cachedDb);
  }

  return MongoClient.connect(uri)
    .then(client => {
      cachedDb = client.db('events');
      return Promise.resolve(cachedDb);
    });

}

function insertIntoDb (db, obj) {
  console.log('=> inserting data into db');

  return db.collection('detection_events').insertOne(obj)
}

module.exports.handler = async (event) => {

  const db = await connectToDatabase(MONGODB_URI);

  const result = await insertIntoDb(db, {
    bucket: event.Records[0].s3.bucket.name,
    key: event.Records[0].s3.object.key
  })

  console.log(result)

  //finally get the HEAD for the s3Object
   const head = await s3.headObject({
     Bucket: event.Records[0].s3.bucket.name,
     Key: event.Records[0].s3.object.key
   }).promise();

   console.log(head)

  return {
    statusCode: 201,
    body: JSON.stringify(result)
  }

};

This should be enough to get you off ground.这应该足以让你离开地面。

EDIT : I recommend you take a look into theofficial NodeJS SDK as its documentation is quite good编辑:我建议您查看官方 NodeJS SDK,因为它的文档非常好

EDIT 2 : as per Michael's suggestion, if your files may contain whitespaces, then use decodeURIComponent(event.Records[0].s3.object.key.replace(/\\+/g, " ")) instead of event.Records[0].s3.object.key编辑 2 :根据迈克尔的建议,如果您的文件可能包含空格,则使用decodeURIComponent(event.Records[0].s3.object.key.replace(/\\+/g, " "))而不是event.Records[0].s3.object.key

EDIT 3 : Now that your code works, you said it adds a "bunch" of images to S3.编辑 3 :现在您的代码可以工作了,您说它向 S3 添加了“一堆”图像。 S3 will fire as many events as inserted images. S3 将触发与插入图像一样多的事件。 This means that N Lambdas will spin up concurrently, therefore creating a new MongoDB connection every time.这意味着 N 个 Lambdas 将同时启动,因此每次都会创建一个新的 MongoDB 连接。

One workaround here is to set the limit of concurrent executions on your Lambda function to a lower number, so you can control how many connections can be open at the same time.这里的一种解决方法是将 Lambda 函数的并发执行限制设置为较低的数字,以便您可以控制可以同时打开的连接数。

In order to do it, go to your Lambda's console and set Concurrency to whatever number you want (on the example below, I use 5).为此,请转到 Lambda 的控制台并将 Concurrency 设置为您想要的任何数字(在下面的示例中,我使用 5)。 This should be enough get through with what you need.这应该足以满足您的需求。

在此处输入图片说明

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM