AW
Storage
Aws core v1.0.0
AWS Storage Services
Overview
This skill covers AWS storage services including S3, EBS, EFS, and database options (DynamoDB, RDS). Choosing the right storage service impacts performance, cost, and application architecture.
Key Concepts
Storage Decision Matrix
┌─────────────────────────────────────────────────────────────┐
│ Storage Service Selection │
├─────────────────────────────────────────────────────────────┤
│ │
│ Use Case │ Recommended Service │
│ ─────────────────────────────┼─────────────────────────────│
│ Object storage (files, imgs) │ S3 │
│ Block storage (EC2 volumes) │ EBS │
│ Shared file system │ EFS / FSx │
│ Key-value, high throughput │ DynamoDB │
│ Relational, complex queries │ RDS / Aurora │
│ In-memory caching │ ElastiCache │
│ Time-series data │ Timestream │
│ Document database │ DocumentDB │
│ Graph database │ Neptune │
│ │
│ S3 Storage Classes: │
│ ┌───────────────────────────────────────────────────────┐ │
│ │ S3 Standard ← Frequent access │ │
│ │ S3 Intelligent ← Auto-tiering │ │
│ │ S3 Standard-IA ← Infrequent, quick retrieval │ │
│ │ S3 One Zone-IA ← Single AZ, cost savings │ │
│ │ S3 Glacier IR ← Archive, milliseconds │ │
│ │ S3 Glacier Flex ← Archive, minutes to hours │ │
│ │ S3 Glacier Deep ← Long-term archive, 12+ hours │ │
│ └───────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
Best Practices
1. Use S3 Lifecycle Policies
Automatically transition objects to cheaper storage classes.
2. Enable Versioning and Replication
Protect against accidental deletion and regional failures.
3. Encrypt Data at Rest
Use AWS KMS for managed encryption keys.
4. Use VPC Endpoints
Access S3 and DynamoDB without internet gateway.
5. Design DynamoDB for Access Patterns
Model data based on query patterns, not relationships.
Code Examples
Example 1: S3 Bucket with Best Practices
import * as s3 from 'aws-cdk-lib/aws-s3';
import * as kms from 'aws-cdk-lib/aws-kms';
export class StorageStack extends cdk.Stack {
public readonly dataBucket: s3.Bucket;
constructor(scope: Construct, id: string, props?: cdk.StackProps) {
super(scope, id, props);
// KMS key for encryption
const encryptionKey = new kms.Key(this, 'DataKey', {
enableKeyRotation: true,
alias: 'alias/data-bucket-key',
description: 'Key for data bucket encryption',
});
// S3 Bucket with security best practices
this.dataBucket = new s3.Bucket(this, 'DataBucket', {
bucketName: `data-${this.account}-${this.region}`,
// Encryption
encryption: s3.BucketEncryption.KMS,
encryptionKey: encryptionKey,
// Security
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
enforceSSL: true,
// Versioning and protection
versioned: true,
objectLockEnabled: true,
// Access logging
serverAccessLogsBucket: logBucket,
serverAccessLogsPrefix: 'data-bucket/',
// Lifecycle rules
lifecycleRules: [
{
id: 'TransitionToIA',
enabled: true,
transitions: [
{
storageClass: s3.StorageClass.INFREQUENT_ACCESS,
transitionAfter: cdk.Duration.days(30),
},
{
storageClass: s3.StorageClass.GLACIER,
transitionAfter: cdk.Duration.days(90),
},
],
},
{
id: 'ExpireOldVersions',
enabled: true,
noncurrentVersionExpiration: cdk.Duration.days(90),
noncurrentVersionTransitions: [
{
storageClass: s3.StorageClass.GLACIER,
transitionAfter: cdk.Duration.days(30),
},
],
},
{
id: 'AbortIncompleteUploads',
enabled: true,
abortIncompleteMultipartUploadAfter: cdk.Duration.days(7),
},
],
// CORS for web uploads
cors: [
{
allowedMethods: [s3.HttpMethods.PUT, s3.HttpMethods.POST],
allowedOrigins: ['https://app.example.com'],
allowedHeaders: ['*'],
maxAge: 3000,
},
],
// Cleanup policy
removalPolicy: cdk.RemovalPolicy.RETAIN,
autoDeleteObjects: false,
});
// Cross-region replication
const replicaBucket = new s3.Bucket(this, 'ReplicaBucket', {
bucketName: `data-replica-${this.account}-us-west-2`,
encryption: s3.BucketEncryption.KMS,
versioned: true,
});
// Enable replication
const replicationRole = new iam.Role(this, 'ReplicationRole', {
assumedBy: new iam.ServicePrincipal('s3.amazonaws.com'),
});
this.dataBucket.grantRead(replicationRole);
replicaBucket.grantWrite(replicationRole);
}
}
Example 2: DynamoDB Single Table Design
import * as dynamodb from 'aws-cdk-lib/aws-dynamodb';
export class OrderTableStack extends cdk.Stack {
constructor(scope: Construct, id: string, props?: cdk.StackProps) {
super(scope, id, props);
// Single table design for orders
const orderTable = new dynamodb.Table(this, 'OrderTable', {
tableName: 'orders',
partitionKey: { name: 'PK', type: dynamodb.AttributeType.STRING },
sortKey: { name: 'SK', type: dynamodb.AttributeType.STRING },
// Billing
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
// Encryption
encryption: dynamodb.TableEncryption.CUSTOMER_MANAGED,
encryptionKey: encryptionKey,
// Point-in-time recovery
pointInTimeRecovery: true,
// Stream for event-driven processing
stream: dynamodb.StreamViewType.NEW_AND_OLD_IMAGES,
// TTL for cleanup
timeToLiveAttribute: 'expiresAt',
removalPolicy: cdk.RemovalPolicy.RETAIN,
});
// GSI for customer queries
orderTable.addGlobalSecondaryIndex({
indexName: 'GSI1',
partitionKey: { name: 'GSI1PK', type: dynamodb.AttributeType.STRING },
sortKey: { name: 'GSI1SK', type: dynamodb.AttributeType.STRING },
projectionType: dynamodb.ProjectionType.ALL,
});
// GSI for status queries
orderTable.addGlobalSecondaryIndex({
indexName: 'GSI2',
partitionKey: { name: 'GSI2PK', type: dynamodb.AttributeType.STRING },
sortKey: { name: 'GSI2SK', type: dynamodb.AttributeType.STRING },
projectionType: dynamodb.ProjectionType.ALL,
});
// Local secondary index
orderTable.addLocalSecondaryIndex({
indexName: 'LSI1',
sortKey: { name: 'createdAt', type: dynamodb.AttributeType.STRING },
projectionType: dynamodb.ProjectionType.ALL,
});
}
}
// Data access patterns implementation
class OrderRepository {
constructor(private readonly client: DynamoDBDocumentClient) {}
async createOrder(order: Order): Promise<void> {
const item = {
PK: `ORDER#${order.id}`,
SK: `ORDER#${order.id}`,
GSI1PK: `CUSTOMER#${order.customerId}`,
GSI1SK: `ORDER#${order.createdAt}#${order.id}`,
GSI2PK: `STATUS#${order.status}`,
GSI2SK: `${order.createdAt}#${order.id}`,
...order,
entityType: 'ORDER',
};
await this.client.send(new PutCommand({
TableName: 'orders',
Item: item,
ConditionExpression: 'attribute_not_exists(PK)',
}));
}
async getOrder(orderId: string): Promise<Order | null> {
const result = await this.client.send(new GetCommand({
TableName: 'orders',
Key: { PK: `ORDER#${orderId}`, SK: `ORDER#${orderId}` },
}));
return result.Item as Order | null;
}
async getCustomerOrders(customerId: string, limit = 20): Promise<Order[]> {
const result = await this.client.send(new QueryCommand({
TableName: 'orders',
IndexName: 'GSI1',
KeyConditionExpression: 'GSI1PK = :pk',
ExpressionAttributeValues: { ':pk': `CUSTOMER#${customerId}` },
Limit: limit,
ScanIndexForward: false, // Most recent first
}));
return result.Items as Order[];
}
async getOrdersByStatus(status: string, startDate: string): Promise<Order[]> {
const result = await this.client.send(new QueryCommand({
TableName: 'orders',
IndexName: 'GSI2',
KeyConditionExpression: 'GSI2PK = :pk AND GSI2SK >= :sk',
ExpressionAttributeValues: {
':pk': `STATUS#${status}`,
':sk': startDate,
},
}));
return result.Items as Order[];
}
// Transaction for order with items
async createOrderWithItems(order: Order, items: OrderItem[]): Promise<void> {
const transactItems = [
{
Put: {
TableName: 'orders',
Item: {
PK: `ORDER#${order.id}`,
SK: `ORDER#${order.id}`,
...order,
},
},
},
...items.map((item, index) => ({
Put: {
TableName: 'orders',
Item: {
PK: `ORDER#${order.id}`,
SK: `ITEM#${String(index).padStart(4, '0')}`,
...item,
},
},
})),
];
await this.client.send(new TransactWriteCommand({
TransactItems: transactItems,
}));
}
}
Example 3: Aurora Serverless with CDK
import * as rds from 'aws-cdk-lib/aws-rds';
export class DatabaseStack extends cdk.Stack {
constructor(scope: Construct, id: string, props?: cdk.StackProps) {
super(scope, id, props);
// Aurora Serverless v2
const cluster = new rds.DatabaseCluster(this, 'OrderDatabase', {
engine: rds.DatabaseClusterEngine.auroraPostgres({
version: rds.AuroraPostgresEngineVersion.VER_15_4,
}),
serverlessV2MinCapacity: 0.5,
serverlessV2MaxCapacity: 8,
writer: rds.ClusterInstance.serverlessV2('writer', {
autoMinorVersionUpgrade: true,
}),
readers: [
rds.ClusterInstance.serverlessV2('reader', {
scaleWithWriter: true,
}),
],
vpc,
vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_ISOLATED },
// Security
storageEncrypted: true,
storageEncryptionKey: encryptionKey,
iamAuthentication: true,
// Credentials
credentials: rds.Credentials.fromSecret(dbSecret),
// Backup
backup: {
retention: cdk.Duration.days(14),
preferredWindow: '03:00-04:00',
},
// Monitoring
cloudwatchLogsExports: ['postgresql'],
cloudwatchLogsRetention: logs.RetentionDays.ONE_MONTH,
monitoringInterval: cdk.Duration.seconds(60),
// Parameters
parameterGroup: new rds.ParameterGroup(this, 'DbParams', {
engine: rds.DatabaseClusterEngine.auroraPostgres({
version: rds.AuroraPostgresEngineVersion.VER_15_4,
}),
parameters: {
'log_statement': 'ddl',
'log_min_duration_statement': '1000',
},
}),
deletionProtection: true,
});
// Security group
cluster.connections.allowFrom(
appSecurityGroup,
ec2.Port.tcp(5432),
'Allow from application'
);
// Read replica endpoint for reads
new cdk.CfnOutput(this, 'ReaderEndpoint', {
value: cluster.clusterReadEndpoint.hostname,
});
}
}
Example 4: EFS for Shared Storage
import * as efs from 'aws-cdk-lib/aws-efs';
export class SharedStorageStack extends cdk.Stack {
constructor(scope: Construct, id: string, props?: cdk.StackProps) {
super(scope, id, props);
// EFS File System
const fileSystem = new efs.FileSystem(this, 'SharedStorage', {
vpc,
vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS },
// Performance
performanceMode: efs.PerformanceMode.GENERAL_PURPOSE,
throughputMode: efs.ThroughputMode.ELASTIC,
// Lifecycle
lifecyclePolicy: efs.LifecyclePolicy.AFTER_30_DAYS,
outOfInfrequentAccessPolicy: efs.OutOfInfrequentAccessPolicy.AFTER_1_ACCESS,
// Security
encrypted: true,
kmsKey: encryptionKey,
// Backup
enableAutomaticBackups: true,
removalPolicy: cdk.RemovalPolicy.RETAIN,
});
// Access point for application
const accessPoint = fileSystem.addAccessPoint('AppAccessPoint', {
path: '/app-data',
createAcl: {
ownerGid: '1000',
ownerUid: '1000',
permissions: '755',
},
posixUser: {
gid: '1000',
uid: '1000',
},
});
// Mount in ECS task
const taskDefinition = new ecs.FargateTaskDefinition(this, 'Task', {
memoryLimitMiB: 512,
cpu: 256,
volumes: [
{
name: 'shared-data',
efsVolumeConfiguration: {
fileSystemId: fileSystem.fileSystemId,
transitEncryption: 'ENABLED',
authorizationConfig: {
accessPointId: accessPoint.accessPointId,
iam: 'ENABLED',
},
},
},
],
});
const container = taskDefinition.addContainer('App', {
image: ecs.ContainerImage.fromRegistry('app:latest'),
});
container.addMountPoints({
sourceVolume: 'shared-data',
containerPath: '/data',
readOnly: false,
});
}
}
Example 5: S3 Pre-signed URLs
import { S3Client, GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3';
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
class FileService {
constructor(private readonly s3Client: S3Client, private readonly bucketName: string) {}
async generateUploadUrl(key: string, contentType: string): Promise<UploadUrlResponse> {
const command = new PutObjectCommand({
Bucket: this.bucketName,
Key: key,
ContentType: contentType,
// Metadata
Metadata: {
'uploaded-by': 'user-id',
'upload-time': new Date().toISOString(),
},
});
const url = await getSignedUrl(this.s3Client, command, {
expiresIn: 3600, // 1 hour
});
return {
uploadUrl: url,
key,
expiresAt: new Date(Date.now() + 3600 * 1000).toISOString(),
};
}
async generateDownloadUrl(key: string): Promise<string> {
const command = new GetObjectCommand({
Bucket: this.bucketName,
Key: key,
});
return getSignedUrl(this.s3Client, command, {
expiresIn: 3600,
});
}
// Multipart upload for large files
async initiateMultipartUpload(key: string): Promise<string> {
const command = new CreateMultipartUploadCommand({
Bucket: this.bucketName,
Key: key,
});
const response = await this.s3Client.send(command);
return response.UploadId!;
}
async generatePartUploadUrls(key: string, uploadId: string, partCount: number): Promise<string[]> {
const urls: string[] = [];
for (let partNumber = 1; partNumber <= partCount; partNumber++) {
const command = new UploadPartCommand({
Bucket: this.bucketName,
Key: key,
UploadId: uploadId,
PartNumber: partNumber,
});
const url = await getSignedUrl(this.s3Client, command, {
expiresIn: 3600,
});
urls.push(url);
}
return urls;
}
}
Anti-Patterns
❌ Wrong Storage Class Selection
// WRONG - using Standard for rarely accessed data
await s3.putObject({
Bucket: bucket,
Key: 'archive/old-logs.gz',
Body: data,
// No storage class specified = Standard (expensive)
});
// ✅ CORRECT - use appropriate class
await s3.putObject({
Bucket: bucket,
Key: 'archive/old-logs.gz',
Body: data,
StorageClass: 'GLACIER_IR',
});