![](/img/trans.png)
[英]Download secure file from S3 server using accesskey and secretkey
[英]Unable to get accessKey and SecretKey from Hadoop conf
我有以下代碼-
import java.io.InputStream
import java.net.URI
import java.util
import com.amazonaws.auth.AWSCredentialsProviderChain
import com.amazonaws.{ClientConfiguration, Protocol}
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model._
import org.apache.hadoop.conf.{Configuration => HadoopConfiguration}
import org.apache.hadoop.fs.{Path => HadoopPath}
import org.apache.hadoop.fs.s3a.{BasicAWSCredentialsProvider, S3AFileSystem}
import com.amazonaws.services.s3.model.ObjectListing
import scala.annotation.tailrec
object FileOperation {
val uri = new URI("s3a://bucket-name/prefixKey/file.json")
val fs: S3AFileSystem = new S3AFileSystem()
def getAWSClient: AmazonS3Client = {
val conf: HadoopConfiguration = new HadoopConfiguration(true)
val awsConf: ClientConfiguration = new ClientConfiguration()
val secureConnections: Boolean = conf.getBoolean("fs.s3a.connection.ssl.enabled", false)
awsConf.setProtocol(if (secureConnections) Protocol.HTTPS else Protocol.HTTP)
val accessKey: String = conf.get("fs.s3a.access.key", null.asInstanceOf[String])
val secretKey: String = conf.get("fs.s3a.secret.key", null.asInstanceOf[String])
println(s"inside getAWSClient accessKey -> $accessKey ; secretKey -> $secretKey")
val credentials = new AWSCredentialsProviderChain(new BasicAWSCredentialsProvider(accessKey, secretKey))
val s3: AmazonS3Client = new AmazonS3Client(credentials, awsConf)
s3.setEndpoint(conf.get("fs.s3a.endpoint", null.asInstanceOf[String]))
s3
}
def getEntries(recursive: Boolean): Seq[URI] = {
@tailrec
def collectEntries(summaries: util.Iterator[S3ObjectSummary], collected: Seq[HadoopPath]): Seq[HadoopPath] = {
if (summaries.hasNext) {
val summary: S3ObjectSummary = summaries.next()
val newPath: String = "s3a://" + summary.getBucketName + "/" + summary.getKey
collectEntries(summaries, collected :+ {
fs.initialize(new URI(newPath), new HadoopConfiguration(true))
new HadoopPath(new URI(newPath))
})
} else collected
}
val prefixKey: String = if (!(uri.getScheme != null && uri.getPath.isEmpty)) uri.getPath.substring(1) else ""
val objects: ObjectListing = getAWSClient.listObjects(uri.getHost, prefixKey)
if (objects.getObjectSummaries.isEmpty) throw new java.nio.file.NoSuchFileException(uri.toString)
else {
collectEntries(objects.getObjectSummaries.iterator(), Seq.empty).map(path => path.toUri)
}
}
def asStream: InputStream = {
val prefixKey1 = if (!(uri.getScheme != null && uri.getPath.isEmpty)) uri.getPath.substring(1) else ""
val s3object: S3Object = getAWSClient.getObject(new GetObjectRequest(uri.getHost, prefixKey1))
s3object.getObjectContent
}
}
在函數getEntries中對函數getAWSClient的調用可以工作,但是在asStream函數中的調用獲取空的訪問密鑰和秘密密鑰。 getEntries函數用於列出文件夾下的文件,而asStream函數返回此類文件的輸入流,該文件流用於創建BufferedSource,然后讀取內容。
以下是hadoop core-ste.xml文件。 有人可以幫助我理解為什么訪問和秘密鍵在asStream函數中顯示為空。
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Sample config for Hadoop S3A client. -->
<configuration>
<property>
<name>fs.s3a.access.key</name>
<value>xxxxxxxx</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>yyyyyyy</value>
</property>
<property>
<name>fs.s3a.connection.ssl.enabled</name>
<value>false</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>dev:80</value>
</property>
<property>
<name>fs.s3a.imp</name>
<value>org.apache.hadoop.fs.s3a.S3A</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>
</configuration>
initialize()
被多次調用。 創建了諸如線程池和AWS Transfer Manager之類的昂貴東西,並且僅在FileSystem.close()
調用中進行了清理。 您的代碼將泄漏這些。 如果我們知道人們正在嘗試這樣做(現在我們要做!),我們將添加檢查並快速失敗。 如果調用FileSystem.listFiles(path, true)
則將獲得路徑下所有對象的遞歸列表,該路徑下只有每千個后代條目一個由S3發出的HTTP請求。 這似乎是您使用listObjects.
哦,如果您想要FS使用的配置,請調用fs.getConf()
。 如果這些秘密在XML文件中傳遞,它將包含這些秘密。 如果將它們保存在JCECKs文件或其他安全存儲中,則選擇起來有些麻煩。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.