Skip to content

Commit 3b7be33

Browse files
authored
Batch of upgrades and cleanups (#173)
* Upgrade our EKS cluster to 1.20 and switch to a Managed Node Group * This lets us take advantage of higher pod:node ratios due to networking advances * Add S3-based logging to our ALBs so we can better catch errors when they alarm * Upgrade ArgoCD to the latest, 2.0.5 * Clean up Vertical Pod Autoscalers (VPAs) since we don't use them anymore Signed-off-by: Irving Popovetsky <irving@honeycomb.io>
1 parent 632939d commit 3b7be33

11 files changed

Lines changed: 2191 additions & 1670 deletions

File tree

kubernetes/argocd/application-crd.yaml

Lines changed: 1391 additions & 1318 deletions
Large diffs are not rendered by default.

kubernetes/argocd/appproject-crd.yaml

Lines changed: 227 additions & 196 deletions
Large diffs are not rendered by default.

kubernetes/argocd/install.yaml

Lines changed: 353 additions & 117 deletions
Large diffs are not rendered by default.
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
---
2+
apiVersion: apps/v1
3+
kind: StatefulSet
4+
metadata:
5+
labels:
6+
app.kubernetes.io/component: redis
7+
app.kubernetes.io/name: argocd-redis
8+
app.kubernetes.io/part-of: argocd
9+
name: argocd-redis
10+
spec:
11+
selector:
12+
matchLabels:
13+
app.kubernetes.io/name: argocd-redis
14+
serviceName: argocd-redis
15+
template:
16+
metadata:
17+
labels:
18+
app.kubernetes.io/name: argocd-redis
19+
spec:
20+
terminationGracePeriodSeconds: 10
21+
containers:
22+
- name: redis
23+
resources:
24+
requests:
25+
memory: "100Mi"
26+
cpu: "100m" # equivalent to 0.1 of a CPU core
27+
args:
28+
- --save
29+
- "60 1000"
30+
- --appendonly
31+
- "yes"
32+
image: redis:6.2.4-alpine
33+
imagePullPolicy: Always
34+
ports:
35+
- containerPort: 6379
36+
volumeMounts:
37+
- name: redis-data
38+
mountPath: /data
39+
securityContext:
40+
fsGroup: 1000
41+
runAsGroup: 1000
42+
runAsNonRoot: true
43+
runAsUser: 1000
44+
serviceAccountName: argocd-redis
45+
volumeClaimTemplates:
46+
- metadata:
47+
name: redis-data
48+
spec:
49+
accessModes: [ "ReadWriteOnce" ]
50+
resources:
51+
requests:
52+
storage: 10Gi
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
#!/bin/bash
2+
3+
set -o pipefail
4+
set -o nounset
5+
set -o errexit
6+
7+
err_report() {
8+
echo "Exited with error on line $1"
9+
}
10+
trap 'err_report $LINENO' ERR
11+
12+
function print_help {
13+
echo "usage: $0 <instance(s)> [options]"
14+
echo "Calculates maxPods value to be used when starting up the kubelet."
15+
echo "-h,--help print this help."
16+
echo "--instance-type Specify the instance type to calculate max pods value."
17+
echo "--instance-type-from-imds Use this flag if the instance type should be fetched from IMDS."
18+
echo "--cni-version Specify the version of the CNI (example - 1.7.5)."
19+
echo "--cni-custom-networking-enabled Use this flag to indicate if CNI custom networking mode has been enabled."
20+
echo "--cni-prefix-delegation-enabled Use this flag to indicate if CNI prefix delegation has been enabled."
21+
echo "--cni-max-eni specify how many ENIs should be used for prefix delegation. Defaults to using all ENIs per instance."
22+
}
23+
24+
POSITIONAL=()
25+
26+
while [[ $# -gt 0 ]]; do
27+
key="$1"
28+
case $key in
29+
-h|--help)
30+
print_help
31+
exit 1
32+
;;
33+
--instance-type)
34+
INSTANCE_TYPE=$2
35+
shift
36+
shift
37+
;;
38+
--instance-type-from-imds)
39+
INSTANCE_TYPE_FROM_IMDS=true
40+
shift
41+
;;
42+
--cni-version)
43+
CNI_VERSION=$2
44+
shift
45+
shift
46+
;;
47+
--cni-custom-networking-enabled)
48+
CNI_CUSTOM_NETWORKING_ENABLED=true
49+
shift
50+
;;
51+
--cni-prefix-delegation-enabled)
52+
CNI_PREFIX_DELEGATION_ENABLED=true
53+
shift
54+
;;
55+
--cni-max-eni)
56+
CNI_MAX_ENI=$2
57+
shift
58+
shift
59+
;;
60+
*) # unknown option
61+
POSITIONAL+=("$1") # save it in an array for later
62+
shift # past argument
63+
;;
64+
esac
65+
done
66+
67+
CNI_VERSION="${CNI_VERSION:-}"
68+
CNI_CUSTOM_NETWORKING_ENABLED="${CNI_CUSTOM_NETWORKING_ENABLED:-false}"
69+
CNI_PREFIX_DELEGATION_ENABLED="${CNI_PREFIX_DELEGATION_ENABLED:-false}"
70+
CNI_MAX_ENI="${CNI_MAX_ENI:-}"
71+
INSTANCE_TYPE="${INSTANCE_TYPE:-}"
72+
INSTANCE_TYPE_FROM_IMDS="${INSTANCE_TYPE_FROM_IMDS:-false}"
73+
74+
PREFIX_DELEGATION_SUPPORTED=false
75+
IPS_PER_PREFIX=16
76+
77+
if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then
78+
TOKEN=$(curl -m 10 -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" -s "http://169.254.169.254/latest/api/token")
79+
export AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/dynamic/instance-identity/document | jq .region -r)
80+
INSTANCE_TYPE=$(curl -m 10 -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/instance-type)
81+
elif [ -z "$INSTANCE_TYPE" ];
82+
# There's no reasonable default for an instanceType so force one to be provided to the script.
83+
then echo "You must specify an instance type to calculate max pods value."
84+
exit 1
85+
fi
86+
87+
if [ -z "$CNI_VERSION" ];
88+
then echo "You must specify a CNI Version to use. Example - 1.7.5"
89+
exit 1
90+
fi
91+
92+
calculate_max_ip_addresses_prefix_delegation() {
93+
enis=$1
94+
instance_max_eni_ips=$2
95+
echo $(($enis * (($instance_max_eni_ips - 1) * $IPS_PER_PREFIX ) + 2))
96+
}
97+
98+
calculate_max_ip_addresses_secondary_ips() {
99+
enis=$1
100+
instance_max_eni_ips=$2
101+
echo $(($enis * ($instance_max_eni_ips - 1) + 2))
102+
}
103+
104+
min_number() {
105+
printf "%s\n" "$@" | sort -g | head -n1
106+
}
107+
108+
109+
VERSION_SPLIT=(${CNI_VERSION//./ })
110+
CNI_MAJOR_VERSION="${VERSION_SPLIT[0]}"
111+
CNI_MINOR_VERSION="${VERSION_SPLIT[1]}"
112+
if [[ "$CNI_MAJOR_VERSION" -gt 1 ]] || ([[ "$CNI_MAJOR_VERSION" = 1 ]] && [[ "$CNI_MINOR_VERSION" -gt 8 ]]); then
113+
PREFIX_DELEGATION_SUPPORTED=true
114+
fi
115+
116+
DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type $INSTANCE_TYPE --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus'})
117+
118+
HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor' )
119+
IS_NITRO=false
120+
if [[ "$HYPERVISOR_TYPE" == "nitro" ]]; then
121+
IS_NITRO=true
122+
fi
123+
INSTANCE_MAX_ENIS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.EniCount' )
124+
INSTANCE_MAX_ENIS_IPS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.PodsPerEniCount' )
125+
126+
if [ -z "$CNI_MAX_ENI" ] ; then
127+
enis_for_pods=$INSTANCE_MAX_ENIS
128+
else
129+
enis_for_pods="$(min_number $CNI_MAX_ENI $INSTANCE_MAX_ENIS)"
130+
fi
131+
132+
if [ "$CNI_CUSTOM_NETWORKING_ENABLED" = true ] ; then
133+
enis_for_pods=$((enis_for_pods-1))
134+
fi
135+
136+
137+
if [ "$IS_NITRO" = true ] && [ "$CNI_PREFIX_DELEGATION_ENABLED" = true ] && [ "$PREFIX_DELEGATION_SUPPORTED" = true ]; then
138+
max_pods=$(calculate_max_ip_addresses_prefix_delegation $enis_for_pods $INSTANCE_MAX_ENIS_IPS)
139+
else
140+
max_pods=$(calculate_max_ip_addresses_secondary_ips $enis_for_pods $INSTANCE_MAX_ENIS_IPS)
141+
fi
142+
143+
# Limit the total number of pods that can be launched on any instance type based on the vCPUs on that instance type.
144+
MAX_POD_CEILING_FOR_LOW_CPU=110
145+
MAX_POD_CEILING_FOR_HIGH_CPU=250
146+
CPU_COUNT=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.CpuCount' )
147+
if [ "$CPU_COUNT" -gt 30 ] ; then
148+
echo $(min_number $MAX_POD_CEILING_FOR_HIGH_CPU $max_pods)
149+
else
150+
echo $(min_number $MAX_POD_CEILING_FOR_LOW_CPU $max_pods)
151+
fi

kubernetes/eksctl/operationcode-backend.yaml

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,27 @@ metadata:
66
name: operationcode-backend
77
region: us-east-2
88

9-
nodeGroups:
10-
- name: eks-infra-spot
9+
managedNodeGroups:
10+
- name: eks-infra-spot-v2
11+
instanceTypes:
12+
- t3.small
13+
spot: true
1114
minSize: 3
1215
desiredCapacity: 3
1316
maxSize: 5
14-
# use Spot instance pricing
15-
instancesDistribution:
16-
instanceTypes:
17-
- t3.small
18-
onDemandBaseCapacity: 0
19-
onDemandPercentageAboveBaseCapacity: 0
2017
volumeSize: 20
18+
volumeType: gp3
19+
# For this to be valid, run:
20+
# kubectl set env daemonset aws-node -n kube-system ENABLE_PREFIX_DELEGATION=true
21+
# kubectl set env daemonset aws-node -n kube-system WARM_PREFIX_TARGET=1
22+
maxPodsPerNode: 30
2123
ssh:
2224
allow: true
2325
publicKeyName: oc-ops
2426
labels:
2527
nodegroup-type: infra
2628
tags:
27-
Name: eks-infra-spot
29+
Name: eks-infra-spot-v2
2830
iam:
2931
withAddonPolicies:
3032
imageBuilder: true

kubernetes/operationcode_python_backend/base/deployment.yaml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,4 @@
11
---
2-
apiVersion: "autoscaling.k8s.io/v1beta2"
3-
kind: VerticalPodAutoscaler
4-
metadata:
5-
name: back-end-vpa
6-
spec:
7-
targetRef:
8-
apiVersion: "apps/v1"
9-
kind: Deployment
10-
name: back-end
11-
---
122
apiVersion: apps/v1
133
kind: Deployment
144
metadata:

kubernetes/operationcode_python_backend/overlays/prod/ingress.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ metadata:
1111
alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS-1-2-2017-01
1212
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}'
1313
alb.ingress.kubernetes.io/actions.response-401: '{"Type":"fixed-response","FixedResponseConfig":{"ContentType":"text/plain","StatusCode":"401","MessageBody":"401 Not Authorized"}}'
14+
alb.ingress.kubernetes.io/load-balancer-attributes: access_logs.s3.enabled=true,access_logs.s3.bucket=oc-alb-logs,access_logs.s3.prefix=oc-prod
15+
alb.ingress.kubernetes.io/load-balancer-attributes: routing.http2.enabled=true
16+
alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=600
1417
labels:
1518
app: back-end
1619
spec:

kubernetes/operationcode_python_backend/overlays/staging/ingress.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ metadata:
1111
alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS-1-2-2017-01
1212
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}'
1313
alb.ingress.kubernetes.io/actions.response-401: '{"Type":"fixed-response","FixedResponseConfig":{"ContentType":"text/plain","StatusCode":"401","MessageBody":"401 Not Authorized"}}'
14+
alb.ingress.kubernetes.io/load-balancer-attributes: access_logs.s3.enabled=true,access_logs.s3.bucket=oc-alb-logs,access_logs.s3.prefix=oc-staging
15+
alb.ingress.kubernetes.io/load-balancer-attributes: routing.http2.enabled=true
16+
alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=600
1417
labels:
1518
app: back-end
1619
spec:

kubernetes/resources_api/base/deployment.yaml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,4 @@
11
---
2-
apiVersion: "autoscaling.k8s.io/v1beta2"
3-
kind: VerticalPodAutoscaler
4-
metadata:
5-
name: resources-api-vpa
6-
spec:
7-
targetRef:
8-
apiVersion: "apps/v1"
9-
kind: Deployment
10-
name: resources-api
11-
---
122
apiVersion: apps/v1
133
kind: Deployment
144
metadata:

0 commit comments

Comments
 (0)