简体   繁体   中英

List All objects in S3 with given Prefix in scala

I am trying list all objects in AWS S3 Buckets with input Bucket Name & Filter Prefix using following code.

import scala.collection.JavaConverters._
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.ListObjectsV2Request

val bucket_name = "Mybucket"
val fiter_prefix = "Test/a/"

def list_objects(str: String): mutable.Buffer[String] = {
        val request : ListObjectsV2Request = new ListObjectsV2Request().withBucketName(bucket_name).withPrefix(str)
        var result: ListObjectsV2Result = new ListObjectsV2Result()
        do {
         result = s3_client.listObjectsV2(request)
         val token = result.getNextContinuationToken
         System.out.println("Next Continuation Token: " + token)
         request.setContinuationToken(token)
        }while(result.isTruncated)
        result.getObjectSummaries.asScala.map(_.getKey).size
}

list_objects(fiter_prefix)

I have applied continuation method but i am just getting last object list. for example is prefix has 2210 objects i am getting back 210 objects only.

Regards Mahi

listObjectsV2 returns some or all (up to 1,000 ) of the objects in a bucket as it is stated here . You need to use Continuation Token to iterate rest of the objects in the bucket.

There is an example code here for java.

This is the code which worked for me.

import scala.collection.JavaConverters._
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.ListObjectsV2Request

val bucket_name = "Mybucket"
val fiter_prefix = "Test/a/"

def list_objects(str: String): List[String] = {
        val s3_client = new AmazonS3Client
        var final_list: List[String] = List()
        var list: List[String] = List()
        val request: ListObjectsV2Request = new ListObjectsV2Request().withBucketName(bucket_name).withPrefix(str)
        var result: ListObjectsV2Result = new ListObjectsV2Result()
        do {
          result = s3_client.listObjectsV2(request)
          val token = result.getNextContinuationToken
          System.out.println("Next Continuation Token: " + token)
          request.setContinuationToken(token)
          list = (result.getObjectSummaries.asScala.map(_.getKey)).toList
          println(list.size)
          final_list = final_list ::: list
          println(final_list)
        } while (result.isTruncated)
        println("size", final_list.size)
        final_list
      }

list_objects(fiter_prefix)

A solution using vanilla Scala avoiding vars and tail recursion:

  import software.amazon.awssdk.regions.Region
  import software.amazon.awssdk.services.s3.S3Client
  import software.amazon.awssdk.services.s3.model.{ListObjectsV2Request, 
  ListObjectsV2Response}

  import scala.annotation.tailrec
  import scala.collection.JavaConverters.asScalaBufferConverter
  import scala.collection.mutable
  import scala.collection.mutable.ListBuffer

  val sourceBucket    = "yourbucket"
  val sourceKey       = "yourKey"
  val subFolderPrefix = "yourprefix"


  def getAllPaths(s3Client: S3Client, initReq: ListObjectsV2Request): List[String] = {
    @tailrec
    def listAllObjectsV2(
      s3Client: S3Client,
      req: ListObjectsV2Request,
      tokenOpt: Option[String],
      isFirstTime: Boolean,
      initList: ListBuffer[String]
    ): ListBuffer[String] = {
      println(s"IsFirstTime = ${isFirstTime}, continuationToken = ${tokenOpt}")
      (isFirstTime, tokenOpt) match {
        case (true, Some(x)) =>
          // this combo is not possible..
          initList
        case (false, None) =>
          // end
          initList
        case (_, _) =>
          // possible scenarios are :
          // true, None : First iteration
          // false, Some(x): Second iteration onwards
          val response =
            s3Client.listObjectsV2(tokenOpt.fold(req)(token => req.toBuilder.continuationToken(token).build()))
          val keys: Seq[String] = response.contents().asScala.toList.map(_.key())
          val nextTokenOpt      = Option(response.nextContinuationToken())
          listAllObjectsV2(s3Client, req, nextTokenOpt, isFirstTime = false, keys ++: initList)
      }
    }
    listAllObjectsV2(s3Client, initReq, None, true, mutable.ListBuffer.empty[String]).toList
  }
  val s3Client = S3Client.builder().region(Region.US_WEST_2).build()
  val request: ListObjectsV2Request =
      ListObjectsV2Request.builder
        .bucket(sourceBucket)
        .prefix(sourceKey + "/" + subFolderPrefix)
        .build

  val listofAllKeys: List[String] = getAllPaths(s3Client, request)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM