-
pyspark read from S3카테고리 없음 2022. 11. 12. 14:56728x90
access_key = "<AWS Access Key ID>" secret_key = "<AWS Secret Key>" hadoop_conf=sc._jsc.hadoopConfiguration() hadoop_conf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") hadoop_conf.set("fs.s3a.access.key", access_key) hadoop_conf.set("fs.s3a.secret.key", secret_key) s3RDD = sc.textFile("s3a://<bucket-name>/weblogs/weblog.log")