diff --git a/assets/js/search-data.json b/assets/js/search-data.json index 4baf65a04..939a552c1 100644 --- a/assets/js/search-data.json +++ b/assets/js/search-data.json @@ -323,7 +323,7 @@ },"46": { "doc": "AWS", "title": "Prepare your S3 bucket", - "content": ". | From the S3 Administration console, choose Create Bucket. | Use the following as your bucket policy, filling in the placeholders: . | Standard Permissions | Minimal Permissions (Advanced) | . { \"Id\": \"lakeFSPolicy\", \"Version\": \"2012-10-17\", \"Statement\": [ { \"Sid\": \"lakeFSObjects\", \"Action\": [ \"s3:GetObject\", \"s3:PutObject\", \"s3:AbortMultipartUpload\", \"s3:ListMultipartUploadParts\" ], \"Effect\": \"Allow\", \"Resource\": [\"arn:aws:s3:::[BUCKET_NAME_AND_PREFIX]/*\"], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } }, { \"Sid\": \"lakeFSBucket\", \"Action\": [ \"s3:ListBucket\", \"s3:GetBucketLocation\", \"s3:ListBucketMultipartUploads\" ], \"Effect\": \"Allow\", \"Resource\": [\"arn:aws:s3:::[BUCKET]\"], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } } ] } . | Replace [BUCKET_NAME], [ACCOUNT_ID] and [IAM_ROLE] with values relevant to your environment. | [BUCKET_NAME_AND_PREFIX] can be the bucket name. If you want to minimize the bucket policy permissions, use the bucket name together with a prefix (e.g. example-bucket/a/b/c). This way, lakeFS will be able to create repositories only under this specific path (see: Storage Namespace). | lakeFS will try to assume the role [IAM_ROLE]. | . If required lakeFS can operate without accessing the data itself, this permission section is useful if you are using presigned URLs mode or the lakeFS Hadoop FileSystem Spark integration. Since this FileSystem performs many operations directly on the storage, lakeFS requires less permissive permissions, resulting in increased security. lakeFS always requires permissions to access the _lakefs prefix under your storage namespace, in which metadata is stored (learn more). By setting this policy without presign mode you’ll be able to perform only metadata operations through lakeFS, meaning that you’ll not be able to use lakeFS to upload or download objects. Specifically you won’t be able to: . | Upload objects using the lakeFS GUI (Works with presign mode) | Upload objects through Spark using the S3 gateway | Run lakectl fs commands (unless using presign mode with --pre-sign flag) | Use Actions and Hooks | . { \"Id\": \"[POLICY_ID]\", \"Version\": \"2012-10-17\", \"Statement\": [ { \"Sid\": \"lakeFSObjects\", \"Action\": [ \"s3:GetObject\", \"s3:PutObject\" ], \"Effect\": \"Allow\", \"Resource\": [ \"arn:aws:s3:::[STORAGE_NAMESPACE]/_lakefs/*\" ], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } }, { \"Sid\": \"lakeFSBucket\", \"Action\": [ \"s3:ListBucket\", \"s3:GetBucketLocation\" ], \"Effect\": \"Allow\", \"Resource\": [\"arn:aws:s3:::[BUCKET]\"], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } } ] } . We can use presigned URLs mode without allowing access to the data from the lakeFS server directly. We can achieve this by using condition keys such as aws:referer, aws:SourceVpc and aws:SourceIp. For example, assume the following scenario: . | lakeFS is deployed outside the company (i.e lakeFS cloud or other VPC not vpc-123) | We don’t want lakeFS to be able to access the data, so we use presign URL, we still need lakeFS role to be able to sign the URL. | We want to allow access from the internal company VPC: vpc-123. | . { \"Sid\": \"allowLakeFSRoleFromCompanyOnly\", \"Effect\": \"Allow\", \"Principal\": { \"AWS\": \"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\" }, \"Action\": [ \"s3:GetObject\", \"s3:PutObject\", ], \"Resource\": [ \"arn:aws:s3:::[BUCKET]/*\", ], \"Condition\": { \"StringEquals\": { \"aws:SourceVpc\": \"vpc-123\" } } } . | . Alternative: use an AWS user . lakeFS can authenticate with your AWS account using an AWS user, using an access key and secret. To allow this, change the policy’s Principal accordingly: . \"Principal\": { \"AWS\": [\"arn:aws:iam::<ACCOUNT_ID>:user/<IAM_USER>\"] } . ", + "content": ". | Take note of the bucket name you want to use with lakeFS | Use the following as your bucket policy, filling in the placeholders: . | Standard Permissions | Standard Permissions (with s3express) | Minimal Permissions (Advanced) | . { \"Id\": \"lakeFSPolicy\", \"Version\": \"2012-10-17\", \"Statement\": [ { \"Sid\": \"lakeFSObjects\", \"Action\": [ \"s3:GetObject\", \"s3:PutObject\", \"s3:AbortMultipartUpload\", \"s3:ListMultipartUploadParts\" ], \"Effect\": \"Allow\", \"Resource\": [\"arn:aws:s3:::[BUCKET_NAME_AND_PREFIX]/*\"], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } }, { \"Sid\": \"lakeFSBucket\", \"Action\": [ \"s3:ListBucket\", \"s3:GetBucketLocation\", \"s3:ListBucketMultipartUploads\" ], \"Effect\": \"Allow\", \"Resource\": [\"arn:aws:s3:::[BUCKET]\"], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } } ] } . | Replace [BUCKET_NAME], [ACCOUNT_ID] and [IAM_ROLE] with values relevant to your environment. | [BUCKET_NAME_AND_PREFIX] can be the bucket name. If you want to minimize the bucket policy permissions, use the bucket name together with a prefix (e.g. example-bucket/a/b/c). This way, lakeFS will be able to create repositories only under this specific path (see: Storage Namespace). | lakeFS will try to assume the role [IAM_ROLE]. | . To use an S3 Express One Zone directory bucket, use the following policy. Note the lakeFSDirectoryBucket statement which is specifically required for using a directory bucket. { \"Id\": \"lakeFSPolicy\", \"Version\": \"2012-10-17\", \"Statement\": [ { \"Sid\": \"lakeFSObjects\", \"Action\": [ \"s3:GetObject\", \"s3:PutObject\", \"s3:AbortMultipartUpload\", \"s3:ListMultipartUploadParts\" ], \"Effect\": \"Allow\", \"Resource\": [\"arn:aws:s3:::[BUCKET_NAME_AND_PREFIX]/*\"], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } }, { \"Sid\": \"lakeFSBucket\", \"Action\": [ \"s3:ListBucket\", \"s3:GetBucketLocation\", \"s3:ListBucketMultipartUploads\" ], \"Effect\": \"Allow\", \"Resource\": [\"arn:aws:s3:::[BUCKET]\"], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } }, { \"Sid\": \"lakeFSDirectoryBucket\", \"Action\": [ \"s3express:CreateSession\" ], \"Effect\": \"Allow\", \"Resource\": \"arn:aws:s3express:[REGION]:[ACCOUNT_ID]:bucket/[BUCKET_NAME]\" } ] } . | Replace [BUCKET_NAME], [ACCOUNT_ID] and [IAM_ROLE] with values relevant to your environment. | [BUCKET_NAME_AND_PREFIX] can be the bucket name. If you want to minimize the bucket policy permissions, use the bucket name together with a prefix (e.g. example-bucket/a/b/c). This way, lakeFS will be able to create repositories only under this specific path (see: Storage Namespace). | lakeFS will try to assume the role [IAM_ROLE]. | . If required lakeFS can operate without accessing the data itself, this permission section is useful if you are using presigned URLs mode or the lakeFS Hadoop FileSystem Spark integration. Since this FileSystem performs many operations directly on the storage, lakeFS requires less permissive permissions, resulting in increased security. lakeFS always requires permissions to access the _lakefs prefix under your storage namespace, in which metadata is stored (learn more). By setting this policy without presign mode you’ll be able to perform only metadata operations through lakeFS, meaning that you’ll not be able to use lakeFS to upload or download objects. Specifically you won’t be able to: . | Upload objects using the lakeFS GUI (Works with presign mode) | Upload objects through Spark using the S3 gateway | Run lakectl fs commands (unless using presign mode with --pre-sign flag) | Use Actions and Hooks | . { \"Id\": \"[POLICY_ID]\", \"Version\": \"2012-10-17\", \"Statement\": [ { \"Sid\": \"lakeFSObjects\", \"Action\": [ \"s3:GetObject\", \"s3:PutObject\" ], \"Effect\": \"Allow\", \"Resource\": [ \"arn:aws:s3:::[STORAGE_NAMESPACE]/_lakefs/*\" ], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } }, { \"Sid\": \"lakeFSBucket\", \"Action\": [ \"s3:ListBucket\", \"s3:GetBucketLocation\" ], \"Effect\": \"Allow\", \"Resource\": [\"arn:aws:s3:::[BUCKET]\"], \"Principal\": { \"AWS\": [\"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\"] } } ] } . We can use presigned URLs mode without allowing access to the data from the lakeFS server directly. We can achieve this by using condition keys such as aws:referer, aws:SourceVpc and aws:SourceIp. For example, assume the following scenario: . | lakeFS is deployed outside the company (i.e lakeFS cloud or other VPC not vpc-123) | We don’t want lakeFS to be able to access the data, so we use presign URL, we still need lakeFS role to be able to sign the URL. | We want to allow access from the internal company VPC: vpc-123. | . { \"Sid\": \"allowLakeFSRoleFromCompanyOnly\", \"Effect\": \"Allow\", \"Principal\": { \"AWS\": \"arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]\" }, \"Action\": [ \"s3:GetObject\", \"s3:PutObject\", ], \"Resource\": [ \"arn:aws:s3:::[BUCKET]/*\", ], \"Condition\": { \"StringEquals\": { \"aws:SourceVpc\": \"vpc-123\" } } } . | . Alternative: use an AWS user . lakeFS can authenticate with your AWS account using an AWS user, using an access key and secret. To allow this, change the policy’s Principal accordingly: . \"Principal\": { \"AWS\": [\"arn:aws:iam::<ACCOUNT_ID>:user/<IAM_USER>\"] } . ", "url": "/howto/deploy/aws.html#prepare-your-s3-bucket", "relUrl": "/howto/deploy/aws.html#prepare-your-s3-bucket" diff --git a/howto/deploy/aws.html b/howto/deploy/aws.html index f4a4ea279..625231316 100644 --- a/howto/deploy/aws.html +++ b/howto/deploy/aws.html @@ -744,11 +744,14 @@

    -
  1. From the S3 Administration console, choose Create Bucket.
  2. -
  3. Use the following as your bucket policy, filling in the placeholders: +
  4. Take note of the bucket name you want to use with lakeFS
  5. +
  6. +

    Use the following as your bucket policy, filling in the placeholders:

    + + +
      +
    • Replace [BUCKET_NAME], [ACCOUNT_ID] and [IAM_ROLE] with values relevant to your environment.
    • +
    • [BUCKET_NAME_AND_PREFIX] can be the bucket name. If you want to minimize the bucket policy permissions, use the bucket name together with a prefix (e.g. example-bucket/a/b/c). +This way, lakeFS will be able to create repositories only under this specific path (see: Storage Namespace).
    • +
    • lakeFS will try to assume the role [IAM_ROLE].
    • +
    + +
    + +

    To use an S3 Express One Zone directory bucket, use the following policy. Note the lakeFSDirectoryBucket statement which is specifically required for using a directory bucket.

    + +
    {
    +   "Id": "lakeFSPolicy",
    +   "Version": "2012-10-17",
    +   "Statement": [
    +      {
    +         "Sid": "lakeFSObjects",
    +         "Action": [
    +            "s3:GetObject",
    +            "s3:PutObject",
    +            "s3:AbortMultipartUpload",
    +            "s3:ListMultipartUploadParts"
    +         ],
    +         "Effect": "Allow",
    +         "Resource": ["arn:aws:s3:::[BUCKET_NAME_AND_PREFIX]/*"],
    +         "Principal": {
    +            "AWS": ["arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]"]
    +         }
    +      },
    +      {
    +         "Sid": "lakeFSBucket",
    +         "Action": [
    +            "s3:ListBucket",
    +            "s3:GetBucketLocation",
    +            "s3:ListBucketMultipartUploads"
    +         ],
    +         "Effect": "Allow",
    +         "Resource": ["arn:aws:s3:::[BUCKET]"],
    +         "Principal": {
    +            "AWS": ["arn:aws:iam::[ACCOUNT_ID]:role/[IAM_ROLE]"]
    +         }
    +      },
    +      {
    +         "Sid": "lakeFSDirectoryBucket",
    +         "Action": [
    +            "s3express:CreateSession"
    +         ],
    +         "Effect": "Allow",
    +         "Resource": "arn:aws:s3express:[REGION]:[ACCOUNT_ID]:bucket/[BUCKET_NAME]"
    +      }
    +   ]
    +}