From 948b917f7792f6cd12cf145aa7c95082251ae60e Mon Sep 17 00:00:00 2001 From: Anusha-janardhan Date: Wed, 17 Jun 2026 18:46:49 +0000 Subject: [PATCH 1/5] feat(cft-templates): enhance EC2 and S3 configurations --- cft-templates/Rstudio.yml | 81 +++++++++++------- cft-templates/ec2-EIP.yml | 58 +++++++++---- cft-templates/s3.yml | 149 +++++++++++++++++++++++---------- dump/standardcatalogitems.json | 22 +++-- 4 files changed, 210 insertions(+), 100 deletions(-) diff --git a/cft-templates/Rstudio.yml b/cft-templates/Rstudio.yml index 12bb9df..30b738f 100644 --- a/cft-templates/Rstudio.yml +++ b/cft-templates/Rstudio.yml @@ -17,19 +17,14 @@ Parameters: Description: >- An S3 URI (starting with "s3://") that specifies the location of files to be copied to the environment instance, including any bootstrap scripts - EBSVolumeSize: - Description: The initial size of the volume (in GBs) EBS will use for storage. - Type: Number - Default: 8 InstanceType: Type: String - Description: Choose the instance type e.g t3.medium (2vCPU , 2GiB RAM), t3.large (2vCPU, 8GiB RAM), t3.xlarge(4vCPU, 16GiB RAM) - AllowedValues: - - t3.medium - - t3.large - - t3.xlarge + Description: Instance type for RStudio. ConstraintDescription: Valid instance type in the t3 families - Default: t3.medium + EBSVolumeSize: + Description: The initial size of the volume (in GBs) EBS will use for storage. + Type: Number + Default: 32 InitialUser: Type: String Description: User Name for RStudio. Do not use 'root' and 'ec2-user' @@ -37,10 +32,7 @@ Parameters: KeyPair: Type: "AWS::EC2::KeyPair::KeyName" Description: Name of an existing EC2 KeyPair to enable SSH access to the instance. If no key pairs exist, please create one from the button next to the dropdown. Please contact your Administrator if you are unable to create one. - AvailabilityZone: - Description: Select the availability zone in which to create the instance. If you plan to attach a secondary volume to the instance, create this instance in the same AvailabilityZone as the volume you created. - Type: AWS::EC2::AvailabilityZone::Name - + Conditions: IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] @@ -61,12 +53,31 @@ Resources: - 'sts:AssumeRole' ManagedPolicyArns: - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore + - arn:aws:iam::aws:policy/AmazonS3FilesClientFullAccess Policies: - !If - IamPolicyEmpty - !Ref 'AWS::NoValue' - PolicyName: !Join ['-', [Ref: Namespace, 's3-studydata-policy']] PolicyDocument: !Ref IamPolicyDocument + - PolicyName: !Join ['-', [Ref: Namespace, 'Product-policy']] + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:*' + - 'sts:*' + - 'kms:*' + - 'ssm:*' + - 'cloudwatch:*' + - 'ec2:*' + - 'logs:*' + - 'xray:*' + - 'ssmmessages:*' + - 'ec2messages:*' + - 's3files:*' + Resource: "*" - PolicyName: param-store-access PolicyDocument: Version: '2012-10-17' @@ -75,7 +86,11 @@ Resources: Action: - 'ssm:GetParameter' - 'ssm:PutParameter' - Resource: !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/*' + Resource: !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/*' + - Effect: 'Allow' + Action: + - 'kms:Decrypt' + Resource: '*' InstanceProfile: Type: 'AWS::IAM::InstanceProfile' @@ -103,7 +118,7 @@ Resources: Type: AWS::EC2::Instance CreationPolicy: ResourceSignal: - Timeout: PT10M + Timeout: PT15M Metadata: AWS::CloudFormation::Init: configSets: @@ -125,16 +140,13 @@ Resources: group: 'root' content: !Sub | #!/usr/bin/env bash - # Get the session token - TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") - - # Get the region to build the parameter name - instance_region=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/placement/region) - echo "Retrieved region ${instance_region} from metadata service" - - # Get the instance id to build the parameter name - instance_id=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/instance-id) + set -euo pipefail + # Get IMDSv2 token + TOKEN=$(curl -s --fail \ + -X PUT "http://169.254.169.254/latest/api/token" \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") + instance_id=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" "http://169.254.169.254/latest/meta-data/instance-id") secret=`uuidgen` echo "setting ${InitialUser} password and starting rstudio" password=$(echo -n "$instance_id$secret" | sha256sum | awk '{print $1}') @@ -143,8 +155,9 @@ Resources: sleep 10 public_key=$(curl http://localhost:8787/auth-public-key) - - aws ssm put-parameter --name "/RL/RG/rstudio/public-key/$instance_id" --value '{"secret":"'$secret'","public_key":"'$public_key'"}' --region $instance_region --type SecureString --overwrite + # instance_region=$(curl -s "http://169.254.169.254/latest/meta-data/placement/region + instance_region=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/placement/region) + aws ssm put-parameter --name "/RL/RG/rstudio/public-key/$instance_id" --value '{"secret":"'$secret'","public_key":"'$public_key'"}' --region $instance_region --type SecureString --overwrite echo "Stored rstudio public key in SSM" '/var/log/rstudio.log': content: "\n" @@ -164,7 +177,6 @@ Resources: Properties: ImageId : '{{resolve:ssm:/RL/RG/StandardCatalog/RStudio}}' InstanceType: !Ref 'InstanceType' - AvailabilityZone: !Ref AvailabilityZone SecurityGroups: [!Ref 'RstudioEC2SecurityGroup'] KeyName: !Ref 'KeyPair' IamInstanceProfile: !Ref InstanceProfile @@ -185,9 +197,14 @@ Resources: Fn::Base64: !Sub | #!/bin/bash exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 - trap '/opt/aws/bin/cfn-signal --exit-code 1 --resource RstudioEC2Instance --region ${AWS::Region} --stack ${AWS::StackName}' ERR - sudo yum update -y --security - + #trap '/opt/aws/bin/cfn-signal --exit-code 1 --resource RstudioEC2Instance --region ${AWS::Region} --stack ${AWS::StackName}' ERR + sudo dnf update -y --security + sudo dnf install -y amazon-efs-utils aws-cfn-bootstrap cronie + systemctl enable crond.service + systemctl start crond.service + sudo systemctl status amazon-ssm-agent + ls -l /opt/aws/bin/cfn-signal + #add user(s) sudo useradd -m -s /bin/bash ${InitialUser} #Add user to the sudo group @@ -221,4 +238,4 @@ Outputs: Value: '443' AvailabilityZone: Description: AvailabilityZone of newly created Rstudio EC2Instance - Value: !Ref AvailabilityZone + Value: !GetAtt [RstudioEC2Instance, AvailabilityZone] diff --git a/cft-templates/ec2-EIP.yml b/cft-templates/ec2-EIP.yml index b81b2d5..4c2411a 100644 --- a/cft-templates/ec2-EIP.yml +++ b/cft-templates/ec2-EIP.yml @@ -21,10 +21,10 @@ Parameters: An S3 URI (starting with "s3://") that specifies the location of files to be copied to the environment instance, including any bootstrap scripts InstanceType: - Description: Choose the instance type e.g t3.small (2vCPU , 2GiB RAM) t3.medium (2vCPU , 4GiB RAM), t3.large (2vCPU, 8GiB RAM). + Description: Choose the instance type # e.g t3.medium (2vCPU , 4GiB RAM), t3.large (2vCPU, 8GiB RAM), t3.xlarge (4vCPU, 16GiB RAM). Type: String - Default: t3.small - AllowedValues: [t3.small, t3.medium, t3.large] + # Default: t3.medium + # AllowedValues: [t3.medium, t3.large,t3.xlarge] ConstraintDescription: must be a valid EC2 instance type. EBSVolumeSize: Description: The initial size of the volume (in GBs) EBS will use for storage. @@ -34,17 +34,17 @@ Parameters: Description: Name of an existing EC2 KeyPair to enable SSH access to the instance. If no key pairs exist, please create one from the button next to the dropdown. Please contact your Administrator if you are unable to create one. Type: AWS::EC2::KeyPair::KeyName ConstraintDescription: must be the name of an existing EC2 KeyPair. - AllowedSSHLocation: - Description: The IP address range that can be used to SSH to the EC2 instances - Type: String - MinLength: '9' - MaxLength: '18' - Default: 0.0.0.0/0 - AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2}) - ConstraintDescription: must be a valid IP CIDR range of the form x.x.x.x/x. + # AllowedSSHLocation: + # Description: The IP address range that can be used to SSH to the EC2 instances + # Type: String + # MinLength: '9' + # MaxLength: '18' + # Default: 0.0.0.0/0 + # AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2}) + # ConstraintDescription: must be a valid IP CIDR range of the form x.x.x.x/x. LatestAmiId: Type: 'AWS::SSM::Parameter::Value' - Default: '/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2' + Default: '/aws/service/ami-amazon-linux-latest/al2023-ami-kernel-default-x86_64' AvailabilityZone: Description: Select the availability zone in which to create the instance. If you plan to attach a secondary volume to the instance, create this instance in the same AvailabilityZone as the volume you created. Type: AWS::EC2::AvailabilityZone::Name @@ -52,6 +52,30 @@ Parameters: Conditions: IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] +Resources: + SSMPolicy: + Type: AWS::IAM::ManagedPolicy + Properties: + ManagedPolicyName: !Join ["-", [Ref: Namespace, "SSM-Policy"]] + PolicyDocument: + Version: "2012-10-17" + Statement: + - Sid: AllowSSMParamActions + Effect: Allow + Action: + - ssm:GetParameter + - ssm:PutParameter + - ssm:DescribeParameters + - s3files:* + Resource: "*" + - Sid: AllowAccessToEncryptionKeys + Effect: Allow + Action: + - kms:Decrypt + - kms:Encrypt + - kms:GenerateDataKey + - kms:DescribeKey + Resource: "*" Resources: IAMRole: Type: 'AWS::IAM::Role' @@ -67,14 +91,16 @@ Resources: - 'ec2.amazonaws.com' Action: - 'sts:AssumeRole' - ManagedPolicyArns: - - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore Policies: - !If - IamPolicyEmpty - !Ref 'AWS::NoValue' - PolicyName: !Join ['-', [Ref: Namespace, 's3-studydata-policy']] PolicyDocument: !Ref IamPolicyDocument + ManagedPolicyArns: + - Ref: SSMPolicy + - arn:aws:iam::aws:policy/amazonSSMManagedInstanceCore + - arn:aws:iam::aws:policy/AmazonS3FilesClientFullAccess InstanceProfile: Type: 'AWS::IAM::InstanceProfile' @@ -91,6 +117,7 @@ Resources: Fn::Base64: !Sub | #!/usr/bin/env bash exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 + sudo dnf install -y amazon-efs-utils # Download and execute bootstrap script aws s3 cp "${EnvironmentInstanceFiles}/get_bootstrap.sh" "/tmp" chmod 500 "/tmp/get_bootstrap.sh" @@ -99,7 +126,6 @@ Resources: # Signal result to CloudFormation /opt/aws/bin/cfn-signal -e $? --stack "${AWS::StackName}" --resource "EC2Instance" --region "${AWS::Region}" InstanceType: !Ref 'InstanceType' - AvailabilityZone: !Ref AvailabilityZone SecurityGroups: [!Ref 'InstanceSecurityGroup'] KeyName: !Ref 'KeyPair' ImageId: !Ref 'LatestAmiId' @@ -126,7 +152,7 @@ Resources: - IpProtocol: tcp FromPort: '22' ToPort: '22' - CidrIp: !Ref 'AllowedSSHLocation' + CidrIp: 0.0.0.0/0 Outputs: InstanceId: diff --git a/cft-templates/s3.yml b/cft-templates/s3.yml index 58bdfe4..c7d6163 100644 --- a/cft-templates/s3.yml +++ b/cft-templates/s3.yml @@ -16,17 +16,17 @@ Parameters: AllowedValues: - aws:kms - AES256 - AccessLoggingBucketName: + EnableS3Files: Type: String - Description: "Pass the bucket name where Access Logs for this S3 bucket should be sent" - + Default: "false" + AllowedValues: + - "true" + - "false" + Description: "Enable Amazon S3 Files for this bucket" + Conditions: AES256: !Equals [!Ref SSEAlgorithm, "AES256"] - AccessLoggingEnabled: - Fn::Not: - - Fn::Equals: - - Ref: AccessLoggingBucketName - - "" + EnableS3FilesCondition: !Equals [!Ref EnableS3Files, "true"] Resources: SampleBucket: Type: AWS::S3::Bucket @@ -42,46 +42,103 @@ Resources: - ServerSideEncryptionByDefault: SSEAlgorithm: !Ref SSEAlgorithm KMSMasterKeyID: !Ref KMSKeyArn - BucketKeyEnabled: true + BucketKeyEnabled: true + VersioningConfiguration: !If + - EnableS3FilesCondition + - Status: Enabled + - !Ref AWS::NoValue + Tags: + - Key: cost_resource + Value: !Sub ${AWS::StackName} + - Key: Name + Value: !Sub ${Namespace} + + S3FilesAccessRole: + Type: AWS::IAM::Role + Condition: EnableS3FilesCondition + Properties: + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: elasticfilesystem.amazonaws.com + Action: sts:AssumeRole + Policies: + - PolicyName: S3FilesBucketAccess + PolicyDocument: + Version: "2012-10-17" + Statement: + - Sid: S3BucketAccess + Effect: Allow + Action: + - s3:ListBucket + - s3:GetBucketLocation + Resource: !GetAtt SampleBucket.Arn + - Sid: S3ObjectAccess + Effect: Allow + Action: + - s3:GetObject + - s3:PutObject + - s3:DeleteObject + Resource: !Sub "${SampleBucket.Arn}/*" + - !If + - AES256 + - !Ref AWS::NoValue + - Sid: KmsAccess + Effect: Allow + Action: + - kms:Decrypt + - kms:GenerateDataKey + Resource: !Ref KMSKeyArn + + S3FileSystem: + Type: AWS::S3Files::FileSystem + Condition: EnableS3FilesCondition + DependsOn: + - S3FilesAccessRole + - SampleBucket + Properties: + AcceptBucketWarning: true + Bucket: !GetAtt SampleBucket.Arn + RoleArn: !GetAtt S3FilesAccessRole.Arn + KmsKeyId: !If + - AES256 + - !Ref AWS::NoValue + - !Ref KMSKeyArn Tags: - Key: cost_resource Value: !Sub ${AWS::StackName} - Key: Name - Value: !Sub ${Namespace} - LoggingConfiguration: - Fn::If: - - AccessLoggingEnabled - - DestinationBucketName: !Ref AccessLoggingBucketName - LogFilePrefix: !Ref Namespace - - !Ref "AWS::NoValue" + Value: !Sub ${Namespace}-s3files -BucketPolicy: - Type: "AWS::S3::BucketPolicy" - Properties: - Bucket: !Ref SampleBucket - PolicyDocument: - Version: "2012-10-17" - Statement: - - Sid: EnforceTLS12 - Effect: Deny - Principal: "*" - Action: "s3:*" - Resource: - - !Sub "${SampleBucket.Arn}" - - !Sub "${SampleBucket.Arn}/*" - Condition: - Bool: - "aws:SecureTransport": "false" - - Sid: EnforceTLS12OrHigher - Effect: Deny - Principal: "*" - Action: "s3:*" - Resource: - - !Sub "${SampleBucket.Arn}" - - !Sub "${SampleBucket.Arn}/*" - Condition: - NumericLessThan: - "s3:TLSVersion": "1.2" + BucketPolicy: + Type: "AWS::S3::BucketPolicy" + Properties: + Bucket: !Ref SampleBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Sid: EnforceTLS12 + Effect: Deny + Principal: "*" + Action: "s3:*" + Resource: + - !Sub "${SampleBucket.Arn}" + - !Sub "${SampleBucket.Arn}/*" + Condition: + Bool: + "aws:SecureTransport": "false" + - Sid: EnforceTLS12OrHigher + Effect: Deny + Principal: "*" + Action: "s3:*" + Resource: + - !Sub "${SampleBucket.Arn}" + - !Sub "${SampleBucket.Arn}/*" + Condition: + NumericLessThan: + "s3:TLSVersion": "1.2" Outputs: BucketArn: @@ -89,4 +146,8 @@ Outputs: Description: URL for website hosted on S3 BucketName: Value: !Ref SampleBucket - Description: Name of S3 bucket \ No newline at end of file + Description: Name of S3 bucket + S3FileSystemId: + Condition: EnableS3FilesCondition + Value: !GetAtt S3FileSystem.FileSystemId + Description: Amazon S3 Files file system ID for bootstrap and mount scripts \ No newline at end of file diff --git a/dump/standardcatalogitems.json b/dump/standardcatalogitems.json index fda684c..4bc7cea 100644 --- a/dump/standardcatalogitems.json +++ b/dump/standardcatalogitems.json @@ -168,7 +168,14 @@ } ], "post_provisioning": [], - "checks_before_assigning_product": [], + "checks_before_assigning_product": [ + { + "Key": "projectTypeCompatibility", + "Value": [ + "Standard" + ] + } + ], "checks_after_assigning_product": [], "permission_required": {}, "cost_resource": true @@ -324,6 +331,12 @@ } ], "ami_path": "/RL/RG/StandardCatalog/RStudio" + }, + { + "Key": "projectTypeCompatibility", + "Value": [ + "Standard" + ] } ], "checks_after_assigning_product": [], @@ -536,7 +549,6 @@ "availableRegions" : [ ], "assignedOU" : [ ], "metaData" : { - "cost_resource" : true, "pre_provisioning" : [ { "code" : "CFT_PARAMS", @@ -1334,12 +1346,6 @@ } ], "checks_before_assigning_product": [ - { - "Key": "projectTypeCompatibility", - "Value": [ - "Standard" - ] - }, { "Key": "projectTypeCompatibility", "Value": [ From a99426bcb9f53ff2559b1a9f8507ea165be7df68 Mon Sep 17 00:00:00 2001 From: Anusha-janardhan Date: Wed, 17 Jun 2026 18:55:27 +0000 Subject: [PATCH 2/5] refactor(cft-templates, config): remove KeyPair parameter and SSH/RDP connection options from RStudio configuration --- cft-templates/Rstudio.yml | 4 ---- config/settings-config.json | 20 -------------------- 2 files changed, 24 deletions(-) diff --git a/cft-templates/Rstudio.yml b/cft-templates/Rstudio.yml index 30b738f..e94f0d2 100644 --- a/cft-templates/Rstudio.yml +++ b/cft-templates/Rstudio.yml @@ -29,9 +29,6 @@ Parameters: Type: String Description: User Name for RStudio. Do not use 'root' and 'ec2-user' Default: rstudio - KeyPair: - Type: "AWS::EC2::KeyPair::KeyName" - Description: Name of an existing EC2 KeyPair to enable SSH access to the instance. If no key pairs exist, please create one from the button next to the dropdown. Please contact your Administrator if you are unable to create one. Conditions: IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] @@ -178,7 +175,6 @@ Resources: ImageId : '{{resolve:ssm:/RL/RG/StandardCatalog/RStudio}}' InstanceType: !Ref 'InstanceType' SecurityGroups: [!Ref 'RstudioEC2SecurityGroup'] - KeyName: !Ref 'KeyPair' IamInstanceProfile: !Ref InstanceProfile PropagateTagsToVolumeOnCreation: true BlockDeviceMappings: diff --git a/config/settings-config.json b/config/settings-config.json index dc53dec..292f33f 100644 --- a/config/settings-config.json +++ b/config/settings-config.json @@ -519,14 +519,6 @@ "actions": { "running": { "connect": [ - { - "menu": "SSH/RDP", - "imageUrl": "../../assets/images/technology@2x.png", - "outputsRequired": [ - "InstanceIPAddress", - "InstanceId" - ] - }, { "menu": "Open Link", "imageUrl": "../../assets/images/link@2x.png", @@ -617,10 +609,6 @@ "actions": { "running": { "connect": [ - { - "menu": "SSH/RDP", - "imageUrl": "../../assets/images/technology@2x.png" - }, { "menu": "Remote Desktop", "imageUrl": "../../assets/images/Screen-icon.png" @@ -871,14 +859,6 @@ "actions": { "running": { "connect": [ - { - "menu": "SSH/RDP", - "imageUrl": "../../assets/images/technology@2x.png", - "outputsRequired": [ - "InstanceIPAddress", - "InstanceId" - ] - }, { "menu": "Open Link", "imageUrl": "../../assets/images/link@2x.png", From 4456afda8c842eade41317b0abd7d4647aec1d73 Mon Sep 17 00:00:00 2001 From: Anusha-janardhan Date: Thu, 18 Jun 2026 04:02:29 +0000 Subject: [PATCH 3/5] feat(cft-templates): update EC2 templates with enhanced IAM policies and improved user data scripts --- cft-templates/ec2-dcv.yml | 417 +++++++++++++++-------------- cft-templates/ec2-jupyterLab.yml | 156 ++++++----- cft-templates/ec2-linux-docker.yml | 72 ++--- 3 files changed, 329 insertions(+), 316 deletions(-) diff --git a/cft-templates/ec2-dcv.yml b/cft-templates/ec2-dcv.yml index e890228..4867b82 100644 --- a/cft-templates/ec2-dcv.yml +++ b/cft-templates/ec2-dcv.yml @@ -1,9 +1,7 @@ -Metadata: +Metadata: License: Apache-2.0 AWSTemplateFormatVersion: '2010-09-09' Description: 'AWS CloudFormation Template to create an GPU based EC2 instance with NICE DCV pre installed.' - - Parameters: Namespace: Type: String @@ -22,51 +20,86 @@ Parameters: InstanceType: Description: Choose the instance type e.g t3.medium (2vCPU , 2GiB RAM), t3.large (2vCPU, 8GiB RAM), t3.xlarge(4vCPU, 16GiB RAM) Type: String - Default: t3.medium - AllowedValues: - [ - t3.medium, - t3.large, - t3.xlarge - ] ConstraintDescription: must be a valid EC2 instance type. EBSVolumeSize: Description: The initial size of the volume (in GBs) EBS will use for storage. Type: Number - Default: 32 - KeyPair: - Description: Name of an existing EC2 KeyPair to enable SSH access to the instance. If no key pairs exist, please create one from the button next to the dropdown. Please contact your Administrator if you are unable to create one. - Type: AWS::EC2::KeyPair::KeyName - ConstraintDescription: must be the name of an existing EC2 KeyPair. - AllowedIpAddress: - Description: The IP address range that can be used to SSH to instance and Connect to DCV - Type: String - MinLength: '9' - MaxLength: '18' - Default: 0.0.0.0/0 - AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2}) - ConstraintDescription: must be a valid IP CIDR range of the form x.x.x.x/x. + Default: 70 AvailabilityZone: Description: Select the availability zone in which to create the instance. If you plan to attach a secondary volume to the instance, create this instance in the same AvailabilityZone as the volume you created. - Type: AWS::EC2::AvailabilityZone::Name + Type: AWS::EC2::AvailabilityZone::Name + # Password: + # Type: String + # NoEcho: True + # Description: Password for the default ec2-user account. Must be at least 8 characters long. + # MinLength: 8 + # ConstraintDescription: Password must be at least 8 characters long. Conditions: IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] -Resources: +Resources: SSMPolicy: Type: AWS::IAM::ManagedPolicy Properties: - ManagedPolicyName: !Join ["-", [Ref: Namespace, "SSM-Policy"]] + ManagedPolicyName: !Join ["-", [!Ref Namespace, "SSM-Policy"]] PolicyDocument: Version: "2012-10-17" Statement: - - Effect: Allow + - Sid: AllowSSMParameterAccess + Effect: Allow Action: - ssm:PutParameter - ssm:GetParameter + - ssm:GetParameters - ssm:DescribeParameters Resource: "*" + - Sid: AllowKMSDecrypt + Effect: Allow + Action: + - kms:Decrypt + - kms:Encrypt + - kms:GenerateDataKey + - kms:DescribeKey + Resource: "*" + InstanceRolePermissionBoundary: + Type: AWS::IAM::ManagedPolicy + Properties: + Description: Permission boundary for EC2 instance role + ManagedPolicyName: !Join ['-', [Ref: Namespace, 'ec2-linux-permission-boundary']] + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 's3:*' + - 's3files:*' + Resource: '*' + - Effect: Allow + Action: + - 'sts:*' + Resource: '*' + - Effect: Allow + Action: + - 'kms:*' + Resource: '*' + - Effect: Allow + Action: + - 'ssm:*' + Resource: '*' + - Effect: Allow + Action: + - 'ssmmessages:*' + Resource: '*' + - Effect: Allow + Action: + - 'ec2messages:*' + Resource: '*' + - Effect: Allow + Action: + - 'sns:Publish' + Resource: '*' + IAMRole: Type: 'AWS::IAM::Role' Properties: @@ -90,6 +123,10 @@ Resources: ManagedPolicyArns: - Ref: SSMPolicy - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore + - arn:aws:iam::aws:policy/AmazonS3FilesClientFullAccess + PermissionsBoundary: !Ref InstanceRolePermissionBoundary + + InstanceProfile: Type: 'AWS::IAM::InstanceProfile' @@ -109,7 +146,6 @@ Resources: configSets: default: - config1 - - config2 config1: files: "/home/ec2-user/.config/autostart/studymount.desktop": @@ -126,84 +162,47 @@ Resources: Terminal=true Name=Study-Mounting Comment=Study Mounting - "/home/ec2-user/.config/autostart/jupyter.desktop": - mode: "000644" - owner: "ec2-user" - group: "ec2-user" - content: | - [Desktop Entry] - Type=Application - Exec="/home/ec2-user/Jupyter.sh" - Hidden=false - NoDisplay=false - X-GNOME-Autostart-enabled=true - Terminal=true - Name=JupyterLab - Comment=Icon for Jupyter - "/home/ec2-user/.config/autostart/rstudio.desktop": - mode: "000644" - owner: "ec2-user" - group: "ec2-user" - content: | - [Desktop Entry] - Type=Application - Exec="/home/ec2-user/rstudio.sh" - Hidden=false - NoDisplay=false - X-GNOME-Autostart-enabled=true - Terminal=true - Name=rstudio-server - Comment=Icon for rstudio - - "/home/ec2-user/rstudio.sh": mode: "000755" owner: "ec2-user" group: "ec2-user" content: !Sub | #!/bin/bash - url="http://localhost:8787/" - link_name="RStudio" - custom_icon="/home/ec2-user/logos/black.png" - mkdir -p $HOME/Desktop - desktop_file="$HOME/Desktop/rstudio.desktop" - cat > "$desktop_file" </dev/null 2>&1 || true + pids=$(ss -ltnp 'sport = :8787' 2>/dev/null | sed -n 's/.*pid=\([0-9]\+\).*/\1/p' | sort -u) + if [ -n "$pids" ]; then + echo "$pids" | xargs -r kill -9 || true + fi + if [ -z "$(docker ps -f "name=rstudio" -f "status=running" -q)" ]; then + docker rm -f rstudio >/dev/null 2>&1 || true + docker run --rm -d --name rstudio -e DISABLE_AUTH=true -v /home/ec2-user:/home/ec2-user -p 8787:8787 relevancelab/rstudio_4.2.1:1.0.3 + sleep 3 + fi + xdg-open "http://localhost:8787/" >/dev/null 2>&1 || true + "/home/ec2-user/jupyter.sh": mode: "000755" owner: "ec2-user" group: "ec2-user" content: !Sub | #!/bin/bash - - sleep 2 - jtoken=$(docker exec jupyterlab /bin/bash -c "jupyter server list" 2>&1 | grep token | awk '{print $2}' | sed 's/.*=//') - + set -euo pipefail + if [ -z "$(docker ps -f "name=jupyterlab" -f "status=running" -q)" ]; then + docker rm -f jupyterlab >/dev/null 2>&1 || true + docker run --rm -d --name jupyterlab -p 8888:8888 -v /home/ec2-user:/home/ec2-user relevancelab/jupiterlab_3.5.0:1.0.3 + sleep 3 + fi + jtoken=$(docker exec jupyterlab /bin/bash -lc "jupyter server list" 2>/dev/null | sed -n 's/.*token=\([^& ]*\).*/\1/p' | head -n1) + if [ -z "$jtoken" ]; then + jtoken=$(docker logs jupyterlab 2>&1 | sed -n 's/.*token=\([^& ]*\).*/\1/p' | head -n1) + fi if [ -n "$jtoken" ]; then - - echo "Token value: $jtoken" + xdg-open "http://127.0.0.1:8888/?token=$jtoken" >/dev/null 2>&1 || true else - echo "Error: Unable to retrieve the Jupyter token." + xdg-open "http://127.0.0.1:8888/" >/dev/null 2>&1 || true fi - url="http://127.0.0.1:8888/?token=$jtoken" - link_name="JupyterLab" - custom_icon="/home/ec2-user/logos/main-logo.svg" - mkdir -p $HOME/Desktop - desktop_file="$HOME/Desktop/jupyter.desktop" - cat > "$desktop_file" < >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 # trap '/opt/aws/bin/cfn-signal --exit-code 1 --resource EC2Instance --region ${AWS::Region} --stack ${AWS::StackName}' ERR + sudo dnf install -y amazon-efs-utils # Download and execute bootstrap script aws s3 cp --region "${AWS::Region}" "${EnvironmentInstanceFiles}/get_bootstrap.sh" "/tmp" chmod 500 "/tmp/get_bootstrap.sh" - /tmp/get_bootstrap.sh "${EnvironmentInstanceFiles}" '${S3Mounts}' "${AWS::Region}" - - # Install supervisor and start on boot - pip3 install supervisor crudini - - # Route auth request to external authenticator and restart dcv - /usr/local/bin/crudini --set /etc/dcv/dcv.conf security auth-token-verifier \"http://127.0.0.1:8445\" - #/usr/local/bin/crudini --set /etc/dcv/dcv.conf clipboard primary-selection-copy false - systemctl restart dcvserver + /tmp/get_bootstrap.sh "${EnvironmentInstanceFiles}" '${S3Mounts}' "" "${AWS::Region}" # Create dcv session start script cat << 'EOF' > /usr/local/bin/start-dcv-session #!/bin/bash service_name="dcvserver" - timeout=20 + timeout=60 elapsed_time=0 - echo "Waiting for $service_name to become active..." - - while true; do + echo "Waiting for $service_name to become active..." + while true; do status=$(systemctl is-active $service_name) if [[ $status == "active" ]]; then echo "$service_name is now active." break fi - if [[ $elapsed_time -ge $timeout ]]; then echo "Timeout: $service_name did not become active within $timeout seconds." break fi - sleep 1 elapsed_time=$((elapsed_time+1)) done - dcv create-session rg-session --name rg-session --user ec2-user --owner ec2-user + dcv create-session rg-session --name rg-session --type=virtual --user ec2-user --owner ec2-user dcv list-sessions date EOF # Create dcv session on reboot - chown root: "/usr/local/bin/start-dcv-session" - chmod 775 "/usr/local/bin/start-dcv-session" - sh "/usr/local/bin/start-dcv-session" - - # # Download and execute bootstrap script - # aws s3 cp --region "${AWS::Region}" "${EnvironmentInstanceFiles}/get_bootstrap.sh" "/tmp" - # chmod 500 "/tmp/get_bootstrap.sh" - # /tmp/get_bootstrap.sh "${EnvironmentInstanceFiles}" '${S3Mounts}' "${AWS::Region}" - + # chown root: "/usr/local/bin/start-dcv-session" + # chmod 775 "/usr/local/bin/start-dcv-session" sleep 5 - docker run -d --restart always -e PASSWORD=Pass@123 -v /home/ec2-user:/home/ec2-user -p 8787:8787 relevancelab/rstudio_4.2.1:1.0.3 - docker run -d --restart always --name jupyterlab -p 8888:8888 -v /home/ec2-user:/home/ec2-user relevancelab/jupiterlab_3.5.0:1.0.3 + # Clean existing containers/ports so this block is safe on reruns + docker stop rstudio jupyterlab 2>/dev/null || true + docker rm -f rstudio jupyterlab 2>/dev/null || true # Run init script to create files /opt/aws/bin/cfn-init --verbose --stack ${AWS::StackName} --resource EC2Instance --region ${AWS::Region} - # Add command to start on reboot - crontab -l 2>/dev/null > "/tmp/crontab" + + # ----------------------------- + # CREATE TOKEN BEFORE AUTH START + # ----------------------------- + + echo "Creating DCV auth token..." + sudo -u ec2-user /home/ec2-user/set_user_token.sh + + echo "Waiting for token propagation..." + + IMDS_TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") + + INSTANCE_ID=$(curl -s -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \ + http://169.254.169.254/latest/meta-data/instance-id) + + REGION=$(curl -s -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \ + http://169.254.169.254/latest/meta-data/placement/region) + + for i in {1..15} + do + TOKEN_VALUE=$(aws ssm get-parameter \ + --name "/RL/RG/nice-dcv/auth-token/$INSTANCE_ID" \ + --region "$REGION" \ + --query Parameter.Value \ + --output text 2>/dev/null) + + if [ ! -z "$TOKEN_VALUE" ]; then + echo "Token confirmed in SSM" + break + fi + + echo "Waiting..." + sleep 2 + done + + # ----------------------------- + # NOW START AUTH SERVICE + # ----------------------------- + + systemctl daemon-reload + systemctl enable dcv-auth + systemctl restart dcv-auth + + echo "Restarting dcvserver..." + systemctl restart dcvserver + + sleep 10 + + # Create dcv session on reboot + chown root: "/usr/local/bin/start-dcv-session" + chmod 775 "/usr/local/bin/start-dcv-session" sh "/usr/local/bin/start-dcv-session" + + # Add command to start on reboot + crontab -l 2>/dev/null > /tmp/crontab echo '@reboot systemctl start dcvserver' >> "/tmp/crontab" - echo '@reboot cd /home/ec2-user/supervisor && /usr/local/bin/supervisord 2>&1 >> /home/ec2-user/supervisor/cust_auth.log' >> "/tmp/crontab" - echo '@reboot sleep 5' >> "/tmp/crontab" echo '@reboot /usr/local/bin/start-dcv-session 2>&1 >> /var/log/start-dcv-session.log' >> "/tmp/crontab" echo '@reboot /home/ec2-user/set_user_token.sh 2>&1 >> /var/log/set_user_token.log' >> "/tmp/crontab" + echo '@reboot systemctl start dcv-auth' >> "/tmp/crontab" + crontab "/tmp/crontab" - # Restart DCV session - #dcv close-session rg-session - #sh "/usr/local/bin/start-dcv-session" - # Remove password and lock for ec2-user passwd -l ec2-user - dcv create-session lock-session --name lock-session --user ec2-user --owner ec2-user --init /home/ec2-user/disable_lock.sh - sudo -u ec2-user /home/ec2-user/themes.sh + sudo -u ec2-user /home/ec2-user/rstudio.sh - sudo -u ec2-user /home/ec2-user/Jupyter.sh + sudo -u ec2-user /home/ec2-user/jupyter.sh dcv list-sessions - + echo "Session ready. Sending signal." /opt/aws/bin/cfn-signal -e $? --stack ${AWS::StackName} --resource EC2Instance --region ${AWS::Region} InstanceType: !Ref 'InstanceType' SecurityGroups: [!Ref 'InstanceSecurityGroup'] - KeyName: !Ref 'KeyPair' AvailabilityZone: !Ref AvailabilityZone ImageId: "{{resolve:ssm:/RL/RG/StandardCatalog/linux-nice-dcv-ami}}" IamInstanceProfile: !Ref InstanceProfile @@ -405,8 +408,8 @@ Resources: Ebs: VolumeSize: !Ref EBSVolumeSize Encrypted: true - - + + PropagateTagsToVolumeOnCreation: true Tags: @@ -416,7 +419,7 @@ Resources: Value: EC2 workspace instance - Key: cost_resource Value: !Sub ${AWS::StackName} - + InstanceSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: @@ -425,11 +428,11 @@ Resources: - IpProtocol: tcp FromPort: '22' ToPort: '22' - CidrIp: !Ref 'AllowedIpAddress' + CidrIp: 0.0.0.0/0 - IpProtocol: tcp FromPort: '8443' ToPort: '8443' - CidrIp: !Ref 'AllowedIpAddress' + CidrIp: 0.0.0.0/0 Outputs: InstanceId: @@ -444,6 +447,6 @@ Outputs: ApplicationPort: Description: The Port in which the application is running Value: '8443' - AvailabilityZone: + AvailabilityZone: Description: AvailabilityZone of newly created EC2 instance - Value: !Ref AvailabilityZone + Value: !Ref AvailabilityZone \ No newline at end of file diff --git a/cft-templates/ec2-jupyterLab.yml b/cft-templates/ec2-jupyterLab.yml index c8fc259..755ceed 100644 --- a/cft-templates/ec2-jupyterLab.yml +++ b/cft-templates/ec2-jupyterLab.yml @@ -17,30 +17,18 @@ Parameters: Description: >- An S3 URI (starting with "s3://") that specifies the location of files to be copied to the environment instance, including any bootstrap scripts - EBSVolumeSize: - Description: The initial size of the volume (in GBs),Select volume size must be 32 or above - Type: Number - Default: 32 InstanceType: Type: String - Description: Choose the instance type e.g t3.medium (2vCPU , 2GiB RAM), t3.large (2vCPU, 8GiB RAM), t3.xlarge(4vCPU, 16GiB RAM) - AllowedValues: - - t3.medium - - t3.large - - t3.xlarge - ConstraintDescription: Valid instance type in the t3 families - Default: t3.medium + Description: Instance type for JupyterLab. + ConstraintDescription: Valid instance type ContainerName: Type: String Description: Docker image of the application to be launched - Default: relevancelab/jupiterlab_3.5.0:1.0.3 - KeyPair: - Type: "AWS::EC2::KeyPair::KeyName" - Description: Name of an existing EC2 KeyPair to enable SSH access to the instance. If no key pairs exist, please create one from the button next to the dropdown. Please contact your Administrator if you are unable to create one. - AvailabilityZone: - Description: Select the availability zone in which to create the instance. If you plan to attach a secondary volume to the instance, create this instance in the same AvailabilityZone as the volume you created. - Type: AWS::EC2::AvailabilityZone::Name - + Default: relevancelab/jupyterlab-4.5.3:1.0.0 + LatestAmiId: + Type: AWS::SSM::Parameter::Value + Default: /aws/service/ecs/optimized-ami/amazon-linux-2023/recommended/image_id + Conditions: IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] @@ -59,8 +47,6 @@ Resources: - 'ec2.amazonaws.com' Action: - 'sts:AssumeRole' - ManagedPolicyArns: - - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore Policies: - !If - IamPolicyEmpty @@ -75,7 +61,11 @@ Resources: Action: - 'ssm:GetParameter' - 'ssm:PutParameter' - Resource: !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/*' + - 's3files:*' + Resource: !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/*' + ManagedPolicyArns: + - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore + - arn:aws:iam::aws:policy/AmazonS3FilesClientFullAccess InstanceProfile: Type: 'AWS::IAM::InstanceProfile' @@ -125,7 +115,7 @@ Resources: image: nginx:latest ports: - "80:80" - - "443:443" + - "443:443" volumes: - /etc/nginx/nginx.conf:/etc/nginx/nginx.conf - /etc/nginx/dhparam.pem:/etc/nginx/dhparam.pem @@ -162,28 +152,46 @@ Resources: group: 'ec2-user' content: !Sub | #!/usr/bin/env bash - echo "fetching token and starting jupyterlab" - `docker-compose up -d` - sleep 5 - # Get the session token - TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") - - # Get the region to build the parameter name - instance_region=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/placement/region) - echo "Retrieved region ${region} from metadata service" - - # Get the instance id to build the parameter name - instance_id=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/instance-id) - echo "Retrieved instance_id $instance_id from metadata service" - - #access_token=$((docker exec jupyterlab /bin/bash -c "jupyter server list" | grep token | awk '{print $1}') | sed 's/.*=//') - #echo "$access_token" - docker exec jupyterlab /bin/bash -c "jupyter server list" > access_token.txt 2>&1 - sudo chmod 777 access_token.txt - access_token=$(grep -oP '(?<=token=)[^ ]+' access_token.txt) - echo "token:$access_token" - aws ssm put-parameter --name "/RL/RG/jupyterLab/auth-token/$instance_id" --value $access_token --region $instance_region --type SecureString --overwrite - echo "Stored jupyterlab token in SSM" + echo "Starting JupyterLab" + cd /home/ec2-user + until docker info >/dev/null 2>&1 + do + echo "Waiting for Docker..." + sleep 5 + done + + docker-compose up -d + + sleep 20 + TOKEN=$(curl -X PUT \ + "http://169.254.169.254/latest/api/token" \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") + + instance_id=$(curl -s \ + -H "X-aws-ec2-metadata-token: $TOKEN" \ + http://169.254.169.254/latest/meta-data/instance-id) + + instance_region=$(curl -s \ + -H "X-aws-ec2-metadata-token: $TOKEN" \ + http://169.254.169.254/latest/meta-data/placement/region) + + access_token=$(docker exec jupyterlab jupyter server list | grep -o 'token=[^ ]*' | head -1 | cut -d= -f2 | cut -d: -f1) + + echo "TOKEN=$access_token" + + if [ -n "$access_token" ]; then + aws ssm put-parameter \ + --name "/RL/RG/jupyterLab/auth-token/$instance_id" \ + --value "$access_token" \ + --region "$instance_region" \ + --type SecureString \ + --overwrite + + echo "Stored Jupyter token in SSM" + else + echo "FAILED: token empty" + exit 1 + fi '/var/log/jupyterlab.log': content: "\n" mode: '000664' @@ -201,18 +209,16 @@ Resources: Properties: - ImageId : '{{resolve:ssm:/RL/RG/StandardCatalog/JupyterLab}}' + ImageId : !Ref LatestAmiId InstanceType: !Ref 'InstanceType' - SecurityGroups: [!Ref 'EC2SecurityGroup'] - KeyName: !Ref 'KeyPair' - IamInstanceProfile: !Ref InstanceProfile - PropagateTagsToVolumeOnCreation: true - AvailabilityZone: !Ref AvailabilityZone BlockDeviceMappings: - DeviceName: /dev/xvda Ebs: - VolumeSize: !Ref EBSVolumeSize - Encrypted: true + VolumeSize: 32 + Encrypted: false + SecurityGroups: [!Ref 'EC2SecurityGroup'] + IamInstanceProfile: !Ref InstanceProfile + PropagateTagsToVolumeOnCreation: true Tags: - Key: Name Value: !Join ['-', [Ref: Namespace, 'jupyterLab']] @@ -225,17 +231,37 @@ Resources: #!/bin/bash exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 - # Install docker and pull necessary containers - sudo yum install -y docker - sudo systemctl enable docker.service - sudo systemctl enable containerd.service + # Install CFN bootstrap + sudo dnf install -y aws-cfn-bootstrap amazon-efs-utils + + # Enable Docker + sudo systemctl enable docker sudo systemctl start docker - sudo usermod -a -G docker ec2-user - sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose + sudo usermod -aG docker ec2-user + + # Enable SSM agent + sudo systemctl enable amazon-ssm-agent + sudo systemctl restart amazon-ssm-agent + + # Install Docker Compose v2 + # sudo curl -SL \ + # https://github.com/docker/compose/releases/download/v2.39.1/docker-compose-linux-x86_64 \ + # -o /usr/local/bin/docker-compose + + sudo curl -SL \ + https://github.com/docker/compose/releases/download/v5.1.4/docker-compose-linux-x86_64 \ + -o /usr/local/bin/docker-compose + sudo chmod +x /usr/local/bin/docker-compose - sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose + sudo ln -sf /usr/local/bin/docker-compose /usr/bin/docker-compose + sudo chmod +x /usr/bin/docker-compose + + docker-compose version + docker --version + + # Pull containers docker pull ${ContainerName} - docker pull nginx + docker pull nginx:latest # Copy environment instance files needed for the workspace aws s3 cp --region "${AWS::Region}" "${EnvironmentInstanceFiles}/jupyterlab/" "/tmp/jupyterlab" --recursive @@ -247,18 +273,18 @@ Resources: # Create self signed certificates chmod 500 "/tmp/create_self_signed_cert.sh" - /tmp/create_self_signed_cert.sh + /tmp/create_self_signed_cert.sh # Add command to start jupyterLab at reboot sudo crontab -l 2>/dev/null > "/tmp/crontab" echo '@reboot cd /home/ec2-user && sudo /home/ec2-user/mount_study.sh 2>&1 >> /home/ec2-user/mount_s3.log' >> "/tmp/crontab" - echo '@reboot cd /home/ec2-user && sudo /home/ec2-user/start_jupyterlab.sh 2>&1 >> /var/log/jupyterLab.log' >> "/tmp/crontab" + echo '@reboot cd /home/ec2-user && sudo /home/ec2-user/start_jupyterlab.sh 2>&1 >> /var/log/jupyterLab.log' >> "/tmp/crontab" sudo crontab "/tmp/crontab" # Mount study aws s3 cp --region "${AWS::Region}" "${EnvironmentInstanceFiles}/get_bootstrap.sh" "/tmp" chmod 500 "/tmp/get_bootstrap.sh" - /tmp/get_bootstrap.sh "${EnvironmentInstanceFiles}" '${S3Mounts}' "${AWS::Region}" + /tmp/get_bootstrap.sh "${EnvironmentInstanceFiles}" '${S3Mounts}' "${AWS::Region}" # init command to create files from config /opt/aws/bin/cfn-init --verbose --stack ${AWS::StackName} --resource EC2Instance --region ${AWS::Region} @@ -279,4 +305,4 @@ Outputs: Value: '443' AvailabilityZone: Description: AvailabilityZone of newly created JupyterLab EC2Instance - Value: !Ref AvailabilityZone + Value: !GetAtt [EC2Instance, AvailabilityZone] diff --git a/cft-templates/ec2-linux-docker.yml b/cft-templates/ec2-linux-docker.yml index 9b1ddb4..da3a487 100644 --- a/cft-templates/ec2-linux-docker.yml +++ b/cft-templates/ec2-linux-docker.yml @@ -1,4 +1,4 @@ -Metadata: +Metadata: License: Apache-2.0 AWSTemplateFormatVersion: '2010-09-09' Description: 'AWS CloudFormation Template to create an EC2 instance @@ -20,36 +20,22 @@ Parameters: Description: >- An S3 URI (starting with "s3://") that specifies the location of files to be copied to the environment instance, including any bootstrap scripts - EBSVolumeSize: - Description: The initial size of the volume (in GBs) EBS will use for storage. - Type: Number - Default: 8 InstanceType: - Description: Choose the instance type e.g t3.small (2vCPU , 2GiB RAM) t3.medium (2vCPU , 4GiB RAM), t3.large (2vCPU, 8GiB RAM). + Description: Choose the instance type for this instance. Type: String - Default: t3.small - AllowedValues: [t3.small, t3.medium, t3.large] ConstraintDescription: must be a valid EC2 instance type. KeyPair: Description: Name of an existing EC2 KeyPair to enable SSH access to the instance. If no key pairs exist, please create one from the button next to the dropdown. Please contact your Administrator if you are unable to create one. Type: AWS::EC2::KeyPair::KeyName ConstraintDescription: must be the name of an existing EC2 KeyPair. - AllowedSSHLocation: - Description: The IP address range that can be used to SSH to the EC2 instances - Type: String - MinLength: '9' - MaxLength: '18' - Default: 0.0.0.0/0 - AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2}) - ConstraintDescription: must be a valid IP CIDR range of the form x.x.x.x/x. LatestAmiId: Type: 'AWS::SSM::Parameter::Value' - Default: '/aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id' + Default: '/aws/service/ecs/optimized-ami/amazon-linux-2023/recommended/image_id' Conditions: IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] -Resources: +Resources: IAMRole: Type: 'AWS::IAM::Role' Properties: @@ -64,15 +50,23 @@ Resources: - 'ec2.amazonaws.com' Action: - 'sts:AssumeRole' - ManagedPolicyArns: - - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore Policies: - !If - IamPolicyEmpty - !Ref 'AWS::NoValue' - PolicyName: !Join ['-', [Ref: Namespace, 's3-studydata-policy']] PolicyDocument: !Ref IamPolicyDocument - + - PolicyName: ssm-and-s3files-access + Effect: Allow + Action: + - ssm:GetParameter + - ssm:PutParameter + - ssm:DescribeParameters + - s3files:* + Resource: '*' + ManagedPolicyArns: + - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore + - arn:aws:iam::aws:policy/AmazonS3FilesClientFullAccess InstanceProfile: Type: 'AWS::IAM::InstanceProfile' Properties: @@ -85,25 +79,20 @@ Resources: Type: AWS::EC2::Instance CreationPolicy: ResourceSignal: - Timeout: PT3M + Timeout: PT5M Properties: UserData: Fn::Base64: !Sub | #!/usr/bin/env bash - sudo yum install zip -y - sudo yum install unzip -y - # Install AWS CLI version2 - curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" - unzip awscliv2.zip - sudo ./aws/install - # Install Mysql Shell - sudo yum install mysql -y - # Install cfn - yum install -y aws-cfn-bootstrap + exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 + # Update packages + sudo dnf update -y + # Install required packages + sudo dnf install -y aws-cfn-bootstrap amazon-efs-utils # Download and execute shell script - aws s3 cp "${EnvironmentInstanceFiles}/get_bootstrap_mysql.sh" "/tmp" - chmod 500 "/tmp/get_bootstrap_mysql.sh" - /tmp/get_bootstrap_mysql.sh "${EnvironmentInstanceFiles}" '${S3Mounts}' + aws s3 cp "${EnvironmentInstanceFiles}/get_bootstrap.sh" "/tmp" + chmod 500 "/tmp/get_bootstrap.sh" + /tmp/get_bootstrap.sh "${EnvironmentInstanceFiles}" '${S3Mounts}' # Signal result to CloudFormation /opt/aws/bin/cfn-signal --exit-code 0 --resource EC2Instance --region ${AWS::Region} --stack ${AWS::StackName} InstanceType: !Ref 'InstanceType' @@ -112,11 +101,6 @@ Resources: ImageId: !Ref 'LatestAmiId' PropagateTagsToVolumeOnCreation: true IamInstanceProfile: !Ref InstanceProfile - BlockDeviceMappings: - - DeviceName: /dev/xvda - Ebs: - VolumeSize: !Ref EBSVolumeSize - Encrypted: true Tags: - Key: Name Value: !Join ['-', [Ref: Namespace, 'ec2-linux']] @@ -124,7 +108,7 @@ Resources: Value: EC2 workspace instance - Key: cost_resource Value: !Sub ${AWS::StackName} - + InstanceSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: @@ -133,7 +117,7 @@ Resources: - IpProtocol: tcp FromPort: '22' ToPort: '22' - CidrIp: !Ref 'AllowedSSHLocation' + CidrIp: 0.0.0.0/0 Outputs: InstanceId: @@ -145,6 +129,6 @@ Outputs: InstanceDNSName: Description: DNS name of the newly created EC2 instance Value: !GetAtt [EC2Instance, PublicDnsName] - AvailabilityZone: + AvailabilityZone: Description: AvailabilityZone of newly created EC2 instance - Value: !GetAtt [EC2Instance, AvailabilityZone] + Value: !GetAtt [EC2Instance, AvailabilityZone] \ No newline at end of file From f722b892c058a6efc61c11057fe81c460e962103 Mon Sep 17 00:00:00 2001 From: Anusha-janardhan Date: Thu, 18 Jun 2026 04:18:58 +0000 Subject: [PATCH 4/5] chore(standardcatalogitems): remove deprecated catalog items and update AMI IDs for various regions --- dump/standardcatalogitems.json | 443 +++++++-------------------------- 1 file changed, 87 insertions(+), 356 deletions(-) diff --git a/dump/standardcatalogitems.json b/dump/standardcatalogitems.json index 4bc7cea..c3321fd 100644 --- a/dump/standardcatalogitems.json +++ b/dump/standardcatalogitems.json @@ -53,66 +53,6 @@ ] } }, - { - "_id": { - "$oid": "60504955135663522b880edb" - }, - "name": "MySQL", - "description": "MySQL container on Docker EC2", - "fileName": "ec2-linux-docker-mysql.yml", - "tags": [ - { - "Key": "EstimatedTimeToProvision", - "Value": "10 Minutes" - }, - { - "Key": "DetailsLink", - "Value": "https://researchgateway.readthedocs.io/en/latest/MySql.html" - }, - { - "Key": "Service", - "Value": "EC2" - }, - { - "Key": "TypeOfProduct", - "Value": "Research" - } - ], - "owner": "RL", - "portfolio": "RGPortfolio", - "availableRegions": [], - "assignedOU": [], - "metaData": { - "pre_provisioning": [ - { - "code": "CFT_PARAMS", - "params": [ - { - "name": "EnvironmentInstanceFiles", - "type": "RL::SC::PARAM::HD" - }, - { - "name": "IamPolicyDocument", - "type": "RL::SC::PARAM::HD" - }, - { - "name": "S3Mounts", - "type": "RL::SC::PARAM::HD" - }, - { - "name": "Namespace", - "type": "RL::SC::PARAM::HD" - } - ] - } - ], - "post_provisioning": [], - "checks_before_assigning_product": [], - "checks_after_assigning_product": [], - "permission_required": {}, - "cost_resource": true - } - }, { "_id": { @@ -181,66 +121,6 @@ "cost_resource": true } }, - { - "_id": { - "$oid": "60504955135663522b880cff" - }, - "name": "Amazon Sagemaker Notebook", - "description": "Amazon SageMaker is a fully managed service that provides the ability to build, train, and deploy ML models", - "fileName": "sagemaker-template-with-url.yml", - "tags": [ - { - "Key": "EstimatedTimeToProvision", - "Value": "10 Minutes" - }, - { - "Key": "DetailsLink", - "Value": "https://researchgateway.readthedocs.io/en/latest/sagemaker.html" - }, - { - "Key": "Service", - "Value": "Sagemaker" - }, - { - "Key": "TypeOfProduct", - "Value": "Research" - } - ], - "owner": "RL", - "portfolio": "RGPortfolio", - "availableRegions": [], - "assignedOU": [], - "metaData": { - "pre_provisioning": [ - { - "code" : "CFT_PARAMS", - "params" : [ - { - "name" : "EnvironmentInstanceFiles", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "IamPolicyDocument", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "S3Mounts", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "Namespace", - "type" : "RL::SC::PARAM::HD" - } - ] - } - ], - "post_provisioning": [], - "checks_before_assigning_product": [], - "checks_after_assigning_product": [], - "permission_required": {}, - "cost_resource": true - } - }, { "_id": { "$oid": "605db54abd5cfd4d85b01079" @@ -312,7 +192,7 @@ "us-east-1": "ami-0a4d426780e0f105a" }, { - "us-east-2": "ami-013010453aaa13c96" + "us-east-2": "ami-0172009bcf1f4f5c1" }, { "us-west-1": "ami-08cfdcb0af9270b0a" @@ -324,7 +204,7 @@ "ap-south-1": "ami-07db46df254bc693b" }, { - "ap-southeast-1": "ami-068731557c511b935" + "ap-southeast-1": "ami-02026d5c6acfee635" }, { "ap-southeast-2": "ami-0a6dfa4f63da1830e" @@ -344,113 +224,6 @@ "cost_resource": true } }, - { - "_id" : { - "$oid":"62839f80e82248514eba03b6" - }, - "name" : "PCluster", - "description" : "Easy to deploy High Performance Computing based on AWS ParallelCluster 3.0 which supports AWS Batch and Slurm schedulers", - "fileName" : "pcluster.yaml", - "tags" : [ - { - "Key" : "EstimatedTimeToProvision", - "Value" : "40 Minutes" - }, - { - "Key" : "DetailsLink", - "Value" : "https://researchgateway.readthedocs.io/en/latest/PCluster.html" - }, - { - "Key" : "TypeOfProduct", - "Value" : "Research" - }, - { - "Key" : "ALB", - "Value" : "true" - }, - { - "Key" : "ApplicationPort", - "Value" : "8443" - }, - { - "Key" : "Service", - "Value" : "Pcluster" - }, - { - "Key" : "ApplicationProtocol", - "Value" : "HTTPS" - } - ], - "owner" : "RL", - "portfolio" : "RGPortfolio", - "availableRegions" : [ ], - "assignedOU" : [], - "metaData" : { - "cost_resource" : true, - "pre_provisioning" : [ - { - "code" : "CFT_PARAMS", - "params" : [ - { - "name" : "ResearcherName", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "ProjectId", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "ComputeEnvMinvCpus", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "ComputeEnvMaxvCpus", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "ComputeEnvDesiredvCpus", - "type" : "RL::SC::PARAM::HD" - } - ] - } - ], - "post_provisioning" : [ ], - "checks_before_assigning_product" : [ - { - "code" : "AMI_ID_REQUIRED", - "is_ami_required" : true, - "ami_id_list" : [ - { - "us-east-1" : "ami-0c118ae8fa3db70b0" - }, - { - "us-east-2" : "ami-011e5bed8ea1b616c" - }, - { - "us-west-1" : "ami-0d10e7a4a2017963d" - }, - { - "us-west-2" : "ami-02872787f6c9fc51d" - }, - { - "ap-south-1" : "ami-0345b99c54d574522" - }, - { - "ap-southeast-1" : "ami-08113a8abda2dbd3c" - }, - { - "ap-southeast-2" : "ami-03f4743208c938213" - } - ], - "ami_path" : "/RL/RG/StandardCatalog/ParallelCluster-linux-ami" - } - ], - "checks_after_assigning_product" : [ ], - "permission_required" : { - - } - } - }, { "_id": { "$oid": "60504955135663522b880cfe" @@ -511,8 +284,8 @@ "_id": { "$oid": "60504955135663522b880abe" }, - "name" : "NICE DCV on Amazon EC2 Linux", - "description" : "NICE DCV is a high-performance remote display protocol using which the customers can run graphics-intensive applications remotely on EC2 instances, and stream their user interface to simpler client machines, eliminating the need for expensive dedicated workstations. The user can connect to the DCV server by using the RG Connect URL button. The user name is ec2-user and password is the Instance ID.", + "name" : "Linux Remote Desktop", + "description" : "A browser-based remote desktop with Amazon Linux 2023 and GNOME UI. Pre-installed tools include RStudio, JupyterLab, VS Code and LibreOffice.", "fileName" : "ec2-dcv.yml", "tags" : [ { @@ -579,7 +352,7 @@ "is_ami_required" : true, "ami_id_list" : [ { - "us-east-2" : "ami-0bcd819dd88585c32" + "us-east-2" : "ami-0bda5589861be0d07" }, { "us-east-1" : "ami-0b5fdc4291f61615f" @@ -594,7 +367,7 @@ "ap-south-1" : "ami-0f81ea31ecefeba4f" }, { - "ap-southeast-1" : "ami-069d31c5036ef2bef" + "ap-southeast-1" : "ami-00141e832c0467574" }, { "ap-southeast-2" : "ami-03a848fa192a4342a" @@ -607,6 +380,12 @@ } ], "ami_path" : "/RL/RG/StandardCatalog/linux-nice-dcv-ami" + }, + { + "Key": "projectTypeCompatibility", + "Value": [ + "Standard" + ] } ], "checks_after_assigning_product" : [ ], @@ -834,6 +613,12 @@ } ], "ami_path" : "/RL/RG/StandardCatalog/VS-Code" + }, + { + "Key": "projectTypeCompatibility", + "Value": [ + "Standard" + ] } ], "checks_after_assigning_product" : [], @@ -940,6 +725,12 @@ } ], "ami_path" : "/RL/RG/StandardCatalog/JupyterLab" + }, + { + "Key": "projectTypeCompatibility", + "Value": [ + "Standard" + ] } ], "checks_after_assigning_product" : [], @@ -947,80 +738,6 @@ "cost_resource" : true } }, - { - "_id": { - "$oid": "60504955135663522b880dac" - }, - "name" : "Integrated Genomics Viewer", - "description" : "The Integrative Genomics Viewer (IGV) is a high-performance, easy-to-use, interactive tool for the visual exploration of genomic data.", - "fileName" : "igv.yml", - "tags" : [ - { - "Key" : "EstimatedTimeToProvision", - "Value" : "10 Minutes" - }, - { - "Key" : "DetailsLink", - "Value" : "https://researchgateway.readthedocs.io/en/latest/IGV.html" - }, - { - "Key" : "Service", - "Value" : "EC2-IGV" - }, - { - "Key" : "TypeOfProduct", - "Value" : "Research" - }, - { - "Key" : "ALB", - "Value" : "true" - }, - { - "Key" : "ApplicationPort", - "Value" : "8443" - }, - { - "Key" : "ApplicationProtocol", - "Value" : "HTTPS" - } - ], - "owner" : "RL", - "portfolio" : "RGPortfolio", - "availableRegions" : [ ], - "assignedOU" : [], - "metaData" : { - "pre_provisioning" : [ - { - "code" : "CFT_PARAMS", - "params" : [ - { - "name" : "EnvironmentInstanceFiles", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "IamPolicyDocument", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "S3Mounts", - "type" : "RL::SC::PARAM::HD" - }, - { - "name" : "Namespace", - "type" : "RL::SC::PARAM::HD" - } - ] - } - ], - "post_provisioning" : [ ], - "checks_before_assigning_product" : [ ], - "checks_after_assigning_product" : [ ], - "permission_required" : { - - }, - "cost_resource" : true - } -}, { "_id": { "$oid": "60504955135663522b880cde" @@ -1103,45 +820,47 @@ } ], "post_provisioning" : [ ], - "checks_before_assigning_product" : [ - { - "Key" : "Secure", - "Value" : true - }, - { - "code" : "AMI_ID_REQUIRED", - "is_ami_required" : true, - "ami_id_list" : [ - { - "us-east-2" : "ami-0dee29a828867a412" - }, - { - "us-east-1" : "ami-011ebe24258e615ba" - }, - { - "us-west-2" : "ami-0ac56f9f8b19a73bf" - }, - { - "us-west-1" : "ami-0e0231a98d0ca2679" - }, - { - "ap-south-1" : "ami-0f81ea31ecefeba4f" - }, - { - "ap-southeast-1" : "ami-069d31c5036ef2bef" - }, - { - "ap-southeast-2" : "ami-03a848fa192a4342a" - }, - { - "ap-east-1" : "ami-01819f556745b5d1d" - }, - { - "eu-west-1" : "ami-03dab069555190662" - } - ], - "ami_path" : "/RL/RG/StandardCatalog/linux-nice-dcv-ami" - } + "checks_before_assigning_product": [ + { + "Key": "projectTypeCompatibility", + "Value": [ + "Secure" + ] + }, + { + "code": "AMI_ID_REQUIRED", + "is_ami_required": true, + "ami_id_list": [ + { + "us-east-2": "ami-0dee29a828867a412" + }, + { + "us-east-1": "ami-011ebe24258e615ba" + }, + { + "us-west-2": "ami-0ac56f9f8b19a73bf" + }, + { + "us-west-1": "ami-0e0231a98d0ca2679" + }, + { + "ap-south-1": "ami-0f81ea31ecefeba4f" + }, + { + "ap-southeast-1": "ami-069d31c5036ef2bef" + }, + { + "ap-southeast-2": "ami-03a848fa192a4342a" + }, + { + "ap-east-1": "ami-01819f556745b5d1d" + }, + { + "eu-west-1": "ami-03dab069555190662" + } + ], + "ami_path": "/RL/RG/StandardCatalog/linux-nice-dcv-ami" + } ], "checks_after_assigning_product" : [ ], "permission_required" : { @@ -1232,11 +951,13 @@ } ], "post_provisioning" : [], - "checks_before_assigning_product" : [ + "checks_before_assigning_product" : [ { - "Key" : "Secure", - "Value" : true - }, + "Key": "projectTypeCompatibility", + "Value": [ + "Secure" + ] + }, { "code" : "AMI_ID_REQUIRED", "is_ami_required" : true, @@ -1346,6 +1067,22 @@ } ], "checks_before_assigning_product": [ + { + "code": "AMI_ID_REQUIRED", + "is_ami_required": true, + "ami_id_list": [ + { + "ap-southeast-1": "ami-05f2bfad8eb50e321" + }, + { + "us-east-2": "ami-0393cfb3beacf7f84" + }, + { + "us-east-1": "ami-0f0fe8a34d1ac91f6" + } + ], + "ami_path": "/RL/RG/StandardCatalog/windowsRemoteDesktop-ami" + }, { "Key": "projectTypeCompatibility", "Value": [ @@ -1589,12 +1326,6 @@ ], "post_provisioning": [], "checks_before_assigning_product": [ - { - "Key": "projectTypeCompatibility", - "Value": [ - "Standard" - ] - }, { "Key": "projectTypeCompatibility", "Value": [ From cac0e4af010e5f6d941006d987c26afe1568a757 Mon Sep 17 00:00:00 2001 From: Anusha-janardhan Date: Thu, 18 Jun 2026 05:12:23 +0000 Subject: [PATCH 5/5] refactor(bootstrap-scripts): streamline S3 mounting process and enhance script functionality --- scripts/bootstrap-scripts/bin/mount_s3.sh | 210 ++++++++++++++++++--- scripts/bootstrap-scripts/bootstrap.sh | 175 +++++++---------- scripts/bootstrap-scripts/get_bootstrap.sh | 14 +- 3 files changed, 248 insertions(+), 151 deletions(-) diff --git a/scripts/bootstrap-scripts/bin/mount_s3.sh b/scripts/bootstrap-scripts/bin/mount_s3.sh index 0fe5e60..e0b4031 100644 --- a/scripts/bootstrap-scripts/bin/mount_s3.sh +++ b/scripts/bootstrap-scripts/bin/mount_s3.sh @@ -12,6 +12,7 @@ # }, ...] CONFIG="/usr/local/etc/s3-mounts.json" MOUNT_DIR="${HOME}/studies" +S3FILES_ROOT="${MOUNT_DIR}/.s3files" AWS_CONFIG_DIR="${HOME}/.aws" # Exit if CONFIG doesn't exist or is 0 bytes @@ -33,6 +34,54 @@ env_type() { fi } +ensure_fuse() { + if [ "$(uname -s)" != "Linux" ]; then + return 0 + fi + if lsmod 2>/dev/null | grep -q '^fuse '; then + return 0 + fi + if command -v yum >/dev/null 2>&1; then + sudo yum install -y fuse fuse-common >/dev/null 2>&1 || true + elif command -v dnf >/dev/null 2>&1; then + sudo dnf install -y fuse fuse-common >/dev/null 2>&1 || true + fi + sudo modprobe fuse 2>/dev/null || true +} + +s3files_client_installed() { + command -v mount.s3files >/dev/null 2>&1 \ + || [ -x /sbin/mount.s3files ] \ + || [ -x /usr/sbin/mount.s3files ] +} + +ensure_s3files_client() { + if s3files_client_installed; then + return 0 + fi + + printf 'Installing amazon-efs-utils (S3 Files client)...\n' + ensure_fuse + + if command -v yum >/dev/null 2>&1; then + if yum list available amazon-efs-utils 2>/dev/null | grep -q '3\.'; then + sudo yum install -y 'amazon-efs-utils-3.*' fuse fuse-common + else + sudo yum install -y amazon-efs-utils fuse fuse-common + fi + elif command -v dnf >/dev/null 2>&1; then + sudo dnf install -y amazon-efs-utils fuse fuse-common + else + curl -fsSL https://amazon-efs-utils.aws.com/efs-utils-installer.sh | sudo sh -s -- --install + fi + + if ! s3files_client_installed; then + printf 'ERROR: mount.s3files is not available; install amazon-efs-utils 3.x for S3 Files mounts\n' >&2 + return 1 + fi + return 0 +} + # Add roleArn for a study to credentials file if not present already append_role_to_credentials() { study_id=$1 @@ -48,6 +97,66 @@ append_role_to_credentials() { fi } +add_fstab_entry() { + local filesystem_id="$1" + local mount_point="$2" + local fstab_entry + + fstab_entry="${filesystem_id}:/ ${mount_point} s3files _netdev 0 0" + + if ! grep -qF "$fstab_entry" /etc/fstab; then + echo "$fstab_entry" | sudo tee -a /etc/fstab >/dev/null + fi +} + +mount_s3files_filesystem() { + local filesystem_id="$1" + local mount_point="$2" + + ensure_s3files_client || return 1 + + mkdir -p "$mount_point" + if ! mountpoint -q "$mount_point"; then + if ! sudo mount \ + -t s3files \ + "${filesystem_id}:/" \ + "$mount_point" + then + printf 'ERROR: failed to mount S3Files filesystem "%s" at "%s"\n' \ + "$filesystem_id" "$mount_point" >&2 + return 1 + fi + fi + add_fstab_entry "${filesystem_id}" "$mount_point" +} + +s3files_link_target() { + local fs_mount_point="$1" + local s3_prefix="$2" + + if [ -n "$s3_prefix" ] && [ "$s3_prefix" != "null" ] && [ "$s3_prefix" != "/" ]; then + printf "%s/%s" "$fs_mount_point" "$s3_prefix" + else + printf "%s" "$fs_mount_point" + fi +} + +link_study_to_s3files_mount() { + local study_id="$1" + local s3_prefix="$2" + local filesystem_id="$3" + local study_dir="${MOUNT_DIR}/${study_id}" + local fs_mount_point="${S3FILES_ROOT}/${filesystem_id}" + local link_target + + link_target="$(s3files_link_target "$fs_mount_point" "$s3_prefix")" + mkdir -p "$MOUNT_DIR" + if [ -e "$study_dir" ] && [ ! -L "$study_dir" ]; then + rm -rf "$study_dir" + fi + ln -sfn "$link_target" "$study_dir" +} + # Use STS regional endpoint instead of global one. This allows external studies to connect with local interface endpoint # if it exists. Refer https://docs.aws.amazon.com/sdkref/latest/guide/setting-global-sts_regional_endpoints.html token=`curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600"` @@ -55,10 +164,39 @@ region=`curl http://169.254.169.254/latest/meta-data/placement/availability-zone export AWS_STS_REGIONAL_ENDPOINTS=regional export AWS_DEFAULT_REGION=$region export AWS_SDK_LOAD_CONFIG=1 +is_linux="false" +if [ "$(uname -s)" = "Linux" ]; then + is_linux="true" +fi + +mkdir -p "$S3FILES_ROOT" # Mount S3 buckets mounts="$(cat "$CONFIG")" num_mounts=$(printf "%s" "$mounts" | jq ". | length" -) + +# First pass: mount each unique S3 Files filesystem once +if [ "$is_linux" = "true" ]; then + for ((study_idx=0; study_idx<$num_mounts; study_idx++)) + do + mount_source="$(printf "%s" "$mounts" | jq -r ".[$study_idx].source // \"S3\"" -)" + filesystem_id="$(printf "%s" "$mounts" | jq -r ".[$study_idx].\"filesystem-id\" // .[$study_idx].filesystemId // \"\"" -)" + if [ "${filesystem_id}" = "" ] || [ "${filesystem_id}" = "null" ]; then + continue + fi + if [ "$mount_source" != "S3Files" ]; then + continue + fi + fs_mount_point="${S3FILES_ROOT}/${filesystem_id}" + if ! mountpoint -q "$fs_mount_point" 2>/dev/null; then + printf 'Mounting S3Files filesystem "%s" at "%s"\n' \ + "$filesystem_id" "$fs_mount_point" + mount_s3files_filesystem "${filesystem_id}" "$fs_mount_point" + fi + done +fi + +# Second pass: per-study paths (symlink to shared S3 Files mount or goofys) for ((study_idx=0; study_idx<$num_mounts; study_idx++)) do # Parse bucket/key info @@ -68,39 +206,53 @@ do s3_role_arn="$(printf "%s" "$mounts" | jq -r ".[$study_idx].roleArn" -)" kms_arn="$(printf "%s" "$mounts" | jq -r ".[$study_idx].kmsArn" -)" bucket_region="$(printf "%s" "$mounts" | jq -r ".[$study_idx].region" -)" - - # Mount S3 location if not already mounted + mount_source="$(printf "%s" "$mounts" | jq -r ".[$study_idx].source // \"S3\"" -)" + filesystem_id="$(printf "%s" "$mounts" | jq -r ".[$study_idx].\"filesystem-id\" // .[$study_idx].filesystemId // \"\"" -)" study_dir="${MOUNT_DIR}/${study_id}" - ps -U "$LOGNAME" -o "command" | egrep -q "goofys .* ${study_dir}$" - if [ $? -ne 0 ] - then - mkdir -p "$study_dir" - if [ "$s3_role_arn" == "null" ] + + if [ "${filesystem_id}" = "" ] || [ "${filesystem_id}" = "null" ]; then + mount_source="S3" + fi + + if [ "$mount_source" = "S3Files" ] && [ "$is_linux" = "true" ]; then + fs_mount_point="${S3FILES_ROOT}/${filesystem_id}" + printf 'Linking study "%s" to S3Files mount at "%s"\n' \ + "$study_id" "$study_dir" + link_study_to_s3files_mount "$study_id" "$s3_prefix" "$filesystem_id" + else + # Mount S3 location if not already mounted + study_dir="${MOUNT_DIR}/${study_id}" + ps -U "$LOGNAME" -o "command" | egrep -q "goofys .* ${study_dir}$" + if [ $? -ne 0 ] then - printf 'Mounting internal study "%s" at "%s"\n' "$study_id" "$study_dir" - goofys --region $bucket_region --acl "bucket-owner-full-control" "${s3_bucket}:${s3_prefix}" "$study_dir" - else - bucket_region="$(printf "%s" "$mounts" | jq -r ".[$study_idx].region" -)" - # BYOB studies have a region specified, but in case it isn't use the default region - if [[ $bucket_region == "null" ]]; then - printf 'Bucket region is not specified. Defaulting to "%s" for mounting \n' "$region" - bucket_region=$region - fi; - - # make .aws dir if it doesn't already exist and add credentials - mkdir -p $AWS_CONFIG_DIR - append_role_to_credentials $study_id $s3_role_arn - if [ "$kms_arn" == "null" ] + mkdir -p "$study_dir" + if [ "$s3_role_arn" == "null" ] then - printf 'Mounting external study "%s" at "%s" using role "%s" and region "%s" \n' "$study_id" "$study_dir" \ - "$s3_role_arn" "$bucket_region" - goofys --region $bucket_region --profile $study_id --acl "bucket-owner-full-control" \ - "${s3_bucket}:${s3_prefix}" "$study_dir" + printf 'Mounting internal study "%s" at "%s"\n' "$study_id" "$study_dir" + goofys --region $bucket_region --acl "bucket-owner-full-control" "${s3_bucket}:${s3_prefix}" "$study_dir" else - printf 'Mounting external study "%s" at "%s" using role "%s", kms arn "%s" and region "%s" \n' "$study_id" "$study_dir" \ - "$s3_role_arn" "$kms_arn" "$bucket_region" - goofys --region $bucket_region --profile $study_id --sse-kms $kms_arn --acl "bucket-owner-full-control" \ - "${s3_bucket}:${s3_prefix}" "$study_dir" + bucket_region="$(printf "%s" "$mounts" | jq -r ".[$study_idx].region" -)" + # BYOB studies have a region specified, but in case it isn't use the default region + if [[ $bucket_region == "null" ]]; then + printf 'Bucket region is not specified. Defaulting to "%s" for mounting \n' "$region" + bucket_region=$region + fi; + + # make .aws dir if it doesn't already exist and add credentials + mkdir -p $AWS_CONFIG_DIR + append_role_to_credentials $study_id $s3_role_arn + if [ "$kms_arn" == "null" ] + then + printf 'Mounting external study "%s" at "%s" using role "%s" and region "%s" \n' "$study_id" "$study_dir" \ + "$s3_role_arn" "$bucket_region" + goofys --region $bucket_region --profile $study_id --acl "bucket-owner-full-control" \ + "${s3_bucket}:${s3_prefix}" "$study_dir" + else + printf 'Mounting external study "%s" at "%s" using role "%s", kms arn "%s" and region "%s" \n' "$study_id" "$study_dir" \ + "$s3_role_arn" "$kms_arn" "$bucket_region" + goofys --region $bucket_region --profile $study_id --sse-kms $kms_arn --acl "bucket-owner-full-control" \ + "${s3_bucket}:${s3_prefix}" "$study_dir" + fi fi fi fi diff --git a/scripts/bootstrap-scripts/bootstrap.sh b/scripts/bootstrap-scripts/bootstrap.sh index db60120..edd3287 100644 --- a/scripts/bootstrap-scripts/bootstrap.sh +++ b/scripts/bootstrap-scripts/bootstrap.sh @@ -1,140 +1,93 @@ #!/usr/bin/env bash -# This script bootstraps a workspace instance by preparing S3 study data to be -# mounted via the mount_s3.sh environment script. -# Note that mounting cannot be performed during initial bootstrapping -# because the instance's role will not yet have access to S3 study -# data since the associated resource policies aren't updated until after the -# CFN stack has been completed created. +# Prepares S3 / S3 Files study mounts on a workspace instance. +# Mounting runs from ec2-user login (.bash_profile) after IAM policies are active. S3_MOUNTS="$1" -RSTUDIO_USER="$2" +[ -z "$S3_MOUNTS" -o "$S3_MOUNTS" = "[]" ] && exit 0 -# Get directory in which this script is stored and define URL from which to download goofys FILES_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" GOOFYS_URL="https://github.com/kahing/goofys/releases/download/v0.24.0/goofys" -# Define a function to determine what type of environment this is (RStudio, or EC2 Linux) env_type() { - if [ -d "/var/log/rstudio-server" ] - then - printf "rstudio" - elif [ -f "/usr/bin/nextflow" ] + if [ -d "/usr/share/aws/emr" ] then - printf "nextflow" + printf "emr" elif [ -d "/home/ec2-user/SageMaker" ] then - printf "sagemaker" + printf "sagemaker" + elif [ -d "/var/log/rstudio-server" ] + then + printf "rstudio" else printf "ec2-linux" fi } -# Define a function to update Jupyter configuration files -update_jupyter_config() { - - config_file="$1" - - # HACK: Update the default SessionManager class used by Jupyter notebooks - # so that it runs the S3 mount script the first time sessions are listed - cat << EOF | cut -b5- >> "$config_file" - - import subprocess - from notebook.services.sessions.sessionmanager import SessionManager as BaseSessionManager - - class SessionManager(BaseSessionManager): - def list_sessions(self, *args, **kwargs): - """Override default list_sessions() method""" - self.mount_studies() - result = super(SessionManager, self).list_sessions(*args, **kwargs) - return result - - def mount_studies(self): - """Execute mount_s3.sh if it hasn't already been run""" - if not hasattr(self, 'studies_mounted'): - mounting_result = subprocess.run( - "mount_s3.sh", - stdout=subprocess.PIPE, stderr=subprocess.STDOUT - ) - - # Log results - if mounting_result.stdout: - for line in mounting_result.stdout.decode("utf-8").split("\n"): - if line: # Skip empty lines - self.log.info(line) - - self.studies_mounted = True - - c.NotebookApp.session_manager_class = SessionManager -EOF +install_jq() { + if command -v jq >/dev/null 2>&1; then + return 0 + fi + if [ -x "${FILES_DIR}/offline-packages/jq-1.5-linux64" ]; then + sudo mv "${FILES_DIR}/offline-packages/jq-1.5-linux64" "/usr/local/bin/jq" + sudo chmod +x "/usr/local/bin/jq" + return 0 + fi + sudo yum install -y jq } -# Install dependencies -echo "Installing JQ" -sudo mv "${FILES_DIR}/offline-packages/jq-1.5-linux64" "/usr/local/bin/jq" -chmod +x "/usr/local/bin/jq" -echo "Finish installing jq" - -echo "Copying Goofys from bootstrap.sh" -cp "${FILES_DIR}/offline-packages/goofys" /usr/local/bin/goofys -chmod +x "/usr/local/bin/goofys" +install_fuse() { + if lsmod 2>/dev/null | grep -q '^fuse '; then + return 0 + fi + if [ -f "${FILES_DIR}/offline-packages/ec2-linux/fuse-2.9.2-11.amzn2.x86_64.rpm" ]; then + sudo yum localinstall -y "${FILES_DIR}/offline-packages/ec2-linux/fuse-2.9.2-11.amzn2.x86_64.rpm" + return 0 + fi + sudo yum install -y fuse fuse-common 2>/dev/null || sudo yum install -y fuse +} -# Install ec2 instance connect agent -sudo yum install ec2-instance-connect-1.1 +install_goofys() { + if command -v goofys >/dev/null 2>&1; then + return 0 + fi + if [ -x "${FILES_DIR}/offline-packages/goofys" ]; then + sudo cp "${FILES_DIR}/offline-packages/goofys" /usr/local/bin/goofys + sudo chmod +x /usr/local/bin/goofys + return 0 + fi + curl -fsSL -o /tmp/goofys "$GOOFYS_URL" + sudo mv /tmp/goofys /usr/local/bin/goofys + sudo chmod +x /usr/local/bin/goofys +} -# Create S3 mount script and config file -echo "Mounting S3" -chmod +x "${FILES_DIR}/bin/mount_s3.sh" -ln -s "${FILES_DIR}/bin/mount_s3.sh" "/usr/local/bin/mount_s3.sh" -# Exit if no S3 mounts were specified -[ -z "$S3_MOUNTS" -o "$S3_MOUNTS" = "[]" ] && exit 0 -printf "%s" "$S3_MOUNTS" > "/usr/local/etc/s3-mounts.json" -echo "Finish mounting S3" +case "$(env_type)" in + "ec2-linux"|"rstudio") + install_jq + install_fuse + install_goofys + ;; +esac -OS_VERSION=`cat /etc/os-release | grep VERSION= | sed 's/VERSION="//' | sed 's/"//'` +sudo mkdir -p /usr/local/etc +sudo chmod +x "${FILES_DIR}/bin/mount_s3.sh" +sudo ln -sf "${FILES_DIR}/bin/mount_s3.sh" "/usr/local/bin/mount_s3.sh" +printf "%s" "$S3_MOUNTS" | sudo tee /usr/local/etc/s3-mounts.json >/dev/null +sudo chmod 644 /usr/local/etc/s3-mounts.json -# Apply updates to environments based on environment type case "$(env_type)" in - "ec2-linux") # Add mount script to bash profile - yum install -y fuse fuse-common - modprobe fuse - printf "\n# Mount S3 study data\nmount_s3.sh\n\n" >> "/home/ec2-user/.bash_profile" - ;; - "rstudio") # Add mount script to bash profile - yum install -y fuse fuse-common - modprobe fuse - printf "\n# Mount S3 study data\nmount_s3.sh\n\n" >> "/home/${RSTUDIO_USER}/.bash_profile" - ;; - "nextflow") # Add mount script to bash profile - yum install -y fuse fuse-common - modprobe fuse - printf "\n# Mount S3 study data\nmount_s3.sh\n\n" >> "/home/ec2-user/.bash_profile" - ;; - "sagemaker") # Update config and restart Jupyter - if [ $OS_VERSION = '2' ] - then - echo "Installing fuse for AL2" - cd "${FILES_DIR}/offline-packages/sagemaker/fuse-2.9.4_AL2" - sudo yum --disablerepo=* localinstall -y *.rpm - echo "Finish installing fuse" - echo "Installing boto3 for AL2" - cd "${FILES_DIR}/offline-packages/sagemaker/boto3" - sudo yum --disablerepo=* localinstall -y python2-boto3-1.4.4-1.amzn2.noarch.rpm - echo "Finish installing boto3" - else - echo "Installing fuse for AL1" - cd "${FILES_DIR}/offline-packages/sagemaker/fuse-2.9.4" - sudo yum --disablerepo=* localinstall -y *.rpm - echo "Finish installing fuse" + "ec2-linux") + if ! grep -q 'mount_s3.sh' /home/ec2-user/.bash_profile 2>/dev/null; then + printf '\n# Mount S3 study data\nmount_s3.sh\n\n' >> /home/ec2-user/.bash_profile fi - update_jupyter_config "/home/ec2-user/.jupyter/jupyter_notebook_config.py" - if [ $OS_VERSION = '2' ] - then - systemctl restart jupyter-server - else - initctl restart jupyter-server --no-wait + chown ec2-user:ec2-user /home/ec2-user/.bash_profile + ;; + "rstudio") + if ! grep -q 'mount_s3.sh' /home/rstudio-user/.bash_profile 2>/dev/null; then + printf '\n# Mount S3 study data\nmount_s3.sh\n\n' >> /home/rstudio-user/.bash_profile fi - ;; + chown rstudio-user:rstudio-user /home/rstudio-user/.bash_profile 2>/dev/null || true + ;; esac exit 0 diff --git a/scripts/bootstrap-scripts/get_bootstrap.sh b/scripts/bootstrap-scripts/get_bootstrap.sh index d999060..43e821c 100644 --- a/scripts/bootstrap-scripts/get_bootstrap.sh +++ b/scripts/bootstrap-scripts/get_bootstrap.sh @@ -1,26 +1,18 @@ #!/usr/bin/env bash bootstrap_s3_location="$1" s3_mounts="$2" -rstudio_user="$3" -region="$4" - -if [ -z "$region" ] -then -region=us-east-2 -fi INSTALL_DIR="/usr/local/share/workspace-environment" # Download instance files and execute bootstrap script -sudo mkdir "$INSTALL_DIR" -sudo aws s3 sync "$bootstrap_s3_location" "$INSTALL_DIR" --region $region +sudo mkdir -p "$INSTALL_DIR" +sudo aws s3 sync "$bootstrap_s3_location" "$INSTALL_DIR" bootstrap_script="$INSTALL_DIR/bootstrap.sh" if [ -s "$bootstrap_script" ] then sudo chmod 500 "$bootstrap_script" - sudo "$bootstrap_script" "$s3_mounts" "$rstudio_user" + sudo "$bootstrap_script" "$s3_mounts" fi exit 0 -