简体   繁体   中英

How do you get the HEAD object from an s3 in node.js? (running in an aws lambda)

I'm trying to run a lambda that inserts metadata into my db every time an object is put into my S3 bucket.

Because I am using MongoDB I have code to persist db connections in between calls. The problem I am having is that I can't get the metadata.

All the code for inserting into the DB has been done, I just need help getting the metadata from an aws lambda.

Here is my code (it is mostly copied from the MongoDB site )

"use strict";
const MongoClient = require('mongodb').MongoClient;
const MONGODB_URI = 'mongodb://cam_writer:1%40kGM%26LL%26gA5y7NVk1cvl9@cluster0-shard-00-00-hlygq.mongodb.net:27017,cluster0-shard-00-01-hlygq.mongodb.net:27017,cluster0-shard-00-02-hlygq.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin&retryWrites=true'; // or Atlas connection string

var AWS = require('aws-sdk')
var s3 = AWS.S3()
let cachedDb = null;

function connectToDatabase (uri) {

  console.log('=> connect to database');

  if (cachedDb) {
    console.log('=> using cached database instance');
    return Promise.resolve(cachedDb);
  }



  return MongoClient.connect(uri)
    .then(client => {
      cachedDb = client.db('events');
      return cachedDb;
    });

}


function queryDatabase (db) {
  console.log('=> query database');

  return db.collection('detection_events').find({}).toArray()
    .then(() => { return { statusCode: 200, body: 'success' }; })
    .catch(err => {
      console.log('=> an error occurred: ', err);
      return { statusCode: 500, body: 'error' };
    });
}

function insertIntoDb (db, obj) {
  console.log('=> inserting data into db');

  return db.collection('detection_events').insertOne(obj)
}

module.exports.handler = (event, context, callback) => {

  context.callbackWaitsForEmptyEventLoop = false;

  console.log(event)

  var meta = {test : "test", "key": event}; // HOW DO I GET THE ACTUAL METADATA FOR THE EVENT?

  console.log('event: ', event);

  connectToDatabase(MONGODB_URI)
    .then(db => insertIntoDb(db, meta))
    .then(result => {
      console.log('=> returning result: ', result);
      callback(null, result);
    })
    .catch(err => {
      console.log('=> an error occurred: ', err);
      callback(err);
    });
};

I know that the "event" passed into a lambda by s3 does not contain the metadata. In python I was able to get the metadata by using boto3, I just don't know how to do it in node.js, (let alone node.js in an aws lambda)

EDIT:

So I've updated my code as per the first answer below. The code is now:

"use strict";
const MongoClient = require('mongodb').MongoClient;
const MONGODB_URI = 'mongodb://cam_writer:1%40kGM%26LL%26gA5y7NVk1cvl9@cluster0-shard-00-00-hlygq.mongodb.net:27017,cluster0-shard-00-01-hlygq.mongodb.net:27017,cluster0-shard-00-02-hlygq.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin&retryWrites=true'; // or Atlas connection string

const AWS = require('aws-sdk')
const s3 = new AWS.S3()
let cachedDb = null;

const connectToDatabase = uri => {

    console.log('=> connect to database');

    if (cachedDb) {
        console.log('=> using cached database instance');
        return Promise.resolve(cachedDb);
    }

    return MongoClient.connect(uri)
        .then(client => {
            cachedDb = client.db('events');
            return Promise.resolve(cachedDb);
        });

}

function insertIntoDb(db, obj) {
    console.log('=> inserting data into db');

    return db.collection('detection_events').insertOne(obj)
}

module.exports.handler = async (event) => {

    const db = await connectToDatabase(MONGODB_URI);

    //finally get the HEAD for the s3Object
    const head = await s3.headObject({
        Bucket: event.Records[0].s3.bucket.name,
        Key: event.Records[0].s3.object.key
    }).promise();

    var meta = head['Metadata']
    meta['bucket'] = event.Records[0].s3.bucket.name,
    meta['key'] = event.Records[0].s3.object.key
    console.log(meta)

    const result = await insertIntoDb(db, meta)

    console.log(result)
    return {
        statusCode: 201,
        body: JSON.stringify(result)
    }
};

I ran my code, which inserts a bunch of images into the s3. This geenrated about 25 connections in mongodb, how can i keep the connections low with the lambda? I thought the code copied from the mongoDB website would allow me to do this

Since you're using an S3 Event, you can get the s3 bucket and key by accessing event.Records[0].s3.bucket.name and event.Records[0].s3.object.key . It could be easily done with the following code:

const params = {
  Bucket: event.Records[0].s3.bucket.name, 
  Key: event.Records[0].s3.object.key
 };
 s3.headObject(params, function(err, data) {
  if (err) {
    console.log(err, err.stack);
    return;
  }
  console.log(data)
});

Just make sure you put this inside your DB callback, otherwise, you'll lose track of it.

I'd highly recommend you to use async/await though, as you won't have to deal with the famous callback hell . Here's the refactored code:

"use strict";
const MongoClient = require('mongodb').MongoClient;
const MONGODB_URI = 'mongodb://cam_writer:1%40kGM%26LL%26gA5y7NVk1cvl9@cluster0-shard-00-00-hlygq.mongodb.net:27017,cluster0-shard-00-01-hlygq.mongodb.net:27017,cluster0-shard-00-02-hlygq.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin&retryWrites=true'; // or Atlas connection string

const AWS = require('aws-sdk')
const s3 = AWS.S3()
let cachedDb = null;

const connectToDatabase = uri => {

  console.log('=> connect to database');

  if (cachedDb) {
    console.log('=> using cached database instance');
    return Promise.resolve(cachedDb);
  }

  return MongoClient.connect(uri)
    .then(client => {
      cachedDb = client.db('events');
      return Promise.resolve(cachedDb);
    });

}

function insertIntoDb (db, obj) {
  console.log('=> inserting data into db');

  return db.collection('detection_events').insertOne(obj)
}

module.exports.handler = async (event) => {

  const db = await connectToDatabase(MONGODB_URI);

  const result = await insertIntoDb(db, {
    bucket: event.Records[0].s3.bucket.name,
    key: event.Records[0].s3.object.key
  })

  console.log(result)

  //finally get the HEAD for the s3Object
   const head = await s3.headObject({
     Bucket: event.Records[0].s3.bucket.name,
     Key: event.Records[0].s3.object.key
   }).promise();

   console.log(head)

  return {
    statusCode: 201,
    body: JSON.stringify(result)
  }

};

This should be enough to get you off ground.

EDIT : I recommend you take a look into theofficial NodeJS SDK as its documentation is quite good

EDIT 2 : as per Michael's suggestion, if your files may contain whitespaces, then use decodeURIComponent(event.Records[0].s3.object.key.replace(/\\+/g, " ")) instead of event.Records[0].s3.object.key

EDIT 3 : Now that your code works, you said it adds a "bunch" of images to S3. S3 will fire as many events as inserted images. This means that N Lambdas will spin up concurrently, therefore creating a new MongoDB connection every time.

One workaround here is to set the limit of concurrent executions on your Lambda function to a lower number, so you can control how many connections can be open at the same time.

In order to do it, go to your Lambda's console and set Concurrency to whatever number you want (on the example below, I use 5). This should be enough get through with what you need.

在此处输入图片说明

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM