Update kubernetes workers

Adds codebuild worker
Adds datadog cluster agent
Tweaks some things
This commit is contained in:
Mark Smith 2019-12-06 21:59:50 -08:00
parent a6e2e7c50d
commit 85a4562906
19 changed files with 290 additions and 50 deletions

View File

@ -0,0 +1,5 @@
# Datadog
This won't just run as-is, you'll need to follow most of the directions here:
https://docs.datadoghq.com/agent/cluster_agent/setup/

View File

@ -0,0 +1,38 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: datadog-agent
rules:
- apiGroups: # This is required by the agent to query the Kubelet API.
- ""
resources:
- nodes/metrics
- nodes/spec
- nodes/proxy # Required to get /pods
verbs:
- get
- nonResourceURLs:
- "/version"
- "/healthz"
- "/metrics"
verbs:
- get
---
kind: ServiceAccount
apiVersion: v1
metadata:
name: datadog-agent
namespace: default
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: datadog-agent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: datadog-agent
subjects:
- kind: ServiceAccount
name: datadog-agent
namespace: default

View File

@ -0,0 +1,68 @@
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: dca
rules:
- apiGroups:
- ""
resources:
- services
- events
- endpoints
- pods
- nodes
- componentstatuses
verbs:
- get
- list
- watch
- apiGroups:
- "autoscaling"
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
resourceNames:
- datadogtoken # Kubernetes event collection state
- datadog-leader-election # Leader election token
verbs:
- get
- update
- apiGroups: # To create the leader election token
- ""
resources:
- configmaps
verbs:
- create
- get
- update
- nonResourceURLs:
- "/version"
- "/healthz"
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: dca
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: dca
subjects:
- kind: ServiceAccount
name: dca
namespace: default
---
kind: ServiceAccount
apiVersion: v1
metadata:
name: dca
namespace: default

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: Service
metadata:
name: datadog-cluster-agent
labels:
app: datadog-cluster-agent
spec:
ports:
- port: 5005 # Has to be the same as the one exposed in the DCA. Default is 5005.
protocol: TCP
selector:
app: datadog-cluster-agent

View File

@ -0,0 +1,42 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: datadog-cluster-agent
namespace: default
spec:
selector:
matchLabels:
app: datadog-cluster-agent
template:
metadata:
labels:
app: datadog-cluster-agent
name: datadog-agent
annotations:
ad.datadoghq.com/datadog-cluster-agent.check_names: '["prometheus"]'
ad.datadoghq.com/datadog-cluster-agent.init_configs: '[{}]'
ad.datadoghq.com/datadog-cluster-agent.instances: '[{"prometheus_url": "http://%%host%%:5000/metrics","namespace": "datadog.cluster_agent","metrics": ["go_goroutines","go_memstats_*","process_*","api_requests","datadog_requests","external_metrics", "cluster_checks_*"]}]'
spec:
serviceAccountName: dca
containers:
- image: datadog/cluster-agent:latest
imagePullPolicy: Always
name: datadog-cluster-agent
env:
- name: DD_API_KEY
valueFrom:
secretKeyRef:
name: datadog-secret
key: api-key
- name: DD_CLUSTER_AGENT_AUTH_TOKEN
valueFrom:
secretKeyRef:
name: datadog-auth-token
key: token
- name: DD_COLLECT_KUBERNETES_EVENTS
value: 'true'
- name: DD_LEADER_ELECTION
value: 'true'
- name: DD_EXTERNAL_METRICS_PROVIDER_ENABLED
value: 'true'

View File

@ -0,0 +1,21 @@
#
# You will need to insert the base64 encoded secrets below.
#
apiVersion: v1
kind: Secret
metadata:
name: datadog-secret
labels:
app: "datadog"
type: Opaque
data:
api-key: "some base64 api key"
---
apiVersion: v1
kind: Secret
metadata:
name: datadog-auth-token
type: Opaque
data:
token: "some 32 character string, base64 encoded"

View File

@ -36,9 +36,9 @@ spec:
name: traceport
protocol: TCP
env:
## Set the Datadog API Key related to your Organization
## If you use the Kubernetes Secret use the following env variable:
## Set the Datadog API Key related to your Organization, and auth token
- {name: DD_API_KEY, valueFrom: { secretKeyRef: { name: datadog-secret, key: api-key }}}
- {name: DD_CLUSTER_AGENT_AUTH_TOKEN, valueFrom: { secretKeyRef: { name: datadog-auth-token, key: token }}}
## Set DD_SITE to "datadoghq.eu" to send your Agent data to the Datadog EU site
- {name: DD_SITE, value: "datadoghq.com"}
@ -47,9 +47,11 @@ spec:
- {name: DD_DOGSTATSD_NON_LOCAL_TRAFFIC, value: "true" }
- {name: KUBERNETES, value: "true"}
- {name: DD_HEALTH_PORT, value: "5555"}
- {name: DD_COLLECT_KUBERNETES_EVENTS, value: "true" }
- {name: DD_LEADER_ELECTION, value: "true" }
- {name: DD_APM_ENABLED, value: "true" }
## We have a cluster agent, don't do these things
- {name: DD_COLLECT_KUBERNETES_EVENTS, value: "false" }
- {name: DD_LEADER_ELECTION, value: "false" }
- {name: DD_APM_ENABLED, value: "false" }
## Enable logs collection
- {name: DD_LOGS_ENABLED, value: "true" }

View File

@ -1,14 +0,0 @@
#
# You will need to insert the base64 encoded secret below.
#
apiVersion: v1
kind: Secret
metadata:
name: datadog-secret
labels:
app: "datadog"
type: Opaque
data:
api-key: "some base64 api key"
---

View File

@ -0,0 +1,57 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: worker-codebuild-notifier
spec:
replicas: 1
selector:
matchLabels:
app: worker-codebuild-notifier
template:
metadata:
labels:
app: worker-codebuild-notifier
annotations:
ad.datadoghq.com/worker-codebuild-notifier.logs: '[{"source":"worker","service":"codebuild-notifier"}]'
spec:
containers:
- image: 194396987458.dkr.ecr.us-east-1.amazonaws.com/dreamwidth/worker:latest
name: worker-codebuild-notifier
command: ["bash"]
args: ["/opt/startup-prod.sh", "bin/worker/codebuild-notifier", "-v"]
resources:
requests:
cpu: 50m
memory: 300M
volumeMounts:
- name: config
mountPath: /dw/etc
readOnly: true
env:
- name: DOGSTATSD_HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
volumes:
- name: config
secret:
secretName: dw-config
---
apiVersion: autoscaling/v2beta2
kind: HorizontalPodAutoscaler
metadata:
name: worker-codebuild-notifier
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: worker-codebuild-notifier
minReplicas: 1
maxReplicas: 1
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 100

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-dw-esn-cluster-subs
spec:
replicas: 5
replicas: 3
selector:
matchLabels:
app: worker-dw-esn-cluster-subs
@ -21,7 +21,7 @@ spec:
args: ["/opt/startup-prod.sh", "bin/worker/dw-esn-cluster-subs", "-v"]
resources:
requests:
cpu: 100m
cpu: 300m
memory: 300M
volumeMounts:
- name: config
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-dw-esn-cluster-subs
minReplicas: 5
minReplicas: 3
maxReplicas: 20
metrics:
- type: Resource

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-dw-esn-filter-subs
spec:
replicas: 5
replicas: 3
selector:
matchLabels:
app: worker-dw-esn-filter-subs
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-dw-esn-filter-subs
minReplicas: 5
minReplicas: 3
maxReplicas: 20
metrics:
- type: Resource

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-dw-esn-fired-event
spec:
replicas: 5
replicas: 3
selector:
matchLabels:
app: worker-dw-esn-fired-event
@ -21,7 +21,7 @@ spec:
args: ["/opt/startup-prod.sh", "bin/worker/dw-esn-fired-event", "-v"]
resources:
requests:
cpu: 100m
cpu: 300m
memory: 300M
volumeMounts:
- name: config
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-dw-esn-fired-event
minReplicas: 5
minReplicas: 3
maxReplicas: 20
metrics:
- type: Resource

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-dw-esn-process-sub
spec:
replicas: 10
replicas: 3
selector:
matchLabels:
app: worker-dw-esn-process-sub
@ -21,7 +21,7 @@ spec:
args: ["/opt/startup-prod.sh", "bin/worker/dw-esn-process-sub", "-v"]
resources:
requests:
cpu: 50m
cpu: 300m
memory: 300M
volumeMounts:
- name: config
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-dw-esn-process-sub
minReplicas: 10
minReplicas: 3
maxReplicas: 50
metrics:
- type: Resource

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-esn-cluster-subs
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: worker-esn-cluster-subs
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-esn-cluster-subs
minReplicas: 2
minReplicas: 1
maxReplicas: 20
metrics:
- type: Resource

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-esn-filter-subs
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: worker-esn-filter-subs
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-esn-filter-subs
minReplicas: 2
minReplicas: 1
maxReplicas: 20
metrics:
- type: Resource

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-esn-fired-event
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: worker-esn-fired-event
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-esn-fired-event
minReplicas: 2
minReplicas: 1
maxReplicas: 20
metrics:
- type: Resource

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-esn-process-sub
spec:
replicas: 10
replicas: 1
selector:
matchLabels:
app: worker-esn-process-sub
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-esn-process-sub
minReplicas: 10
minReplicas: 1
maxReplicas: 50
metrics:
- type: Resource

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: worker-send-email-ses
spec:
replicas: 50
replicas: 40
selector:
matchLabels:
app: worker-send-email-ses
@ -46,7 +46,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: worker-send-email-ses
minReplicas: 50
minReplicas: 40
maxReplicas: 100
metrics:
- type: Resource

View File

@ -4,17 +4,26 @@ use strict;
use v5.10;
my %workers = (
# Name MinCt, MaxCt, Memory, MilliCpu, TgtCpu
'esn-cluster-subs' => [ 2, 20, '300M', '100m', 100 ],
'dw-esn-cluster-subs' => [ 5, 20, '300M', '100m', 100 ],
'esn-filter-subs' => [ 2, 20, '300M', '300m', 100 ],
'dw-esn-filter-subs' => [ 5, 20, '300M', '300m', 100 ],
'esn-fired-event' => [ 2, 20, '300M', '100m', 100 ],
'dw-esn-fired-event' => [ 5, 20, '300M', '100m', 100 ],
'esn-process-sub' => [ 10, 50, '300M', '50m', 100 ],
'dw-esn-process-sub' => [ 10, 50, '300M', '50m', 100 ],
'send-email-ses' => [ 50, 100, '300M', '50m', 100 ],
# Name MinCt, MaxCt, Memory, MilliCpu, TgtCpu
# New SQS based workers
'dw-esn-cluster-subs' => [ 3, 20, '300M', '300m', 100 ],
'dw-esn-filter-subs' => [ 3, 20, '300M', '300m', 100 ],
'dw-esn-fired-event' => [ 3, 20, '300M', '300m', 100 ],
'dw-esn-process-sub' => [ 3, 50, '300M', '300m', 100 ],
# Old style ESN workers
'esn-cluster-subs' => [ 1, 20, '300M', '100m', 100 ],
'esn-filter-subs' => [ 1, 20, '300M', '300m', 100 ],
'esn-fired-event' => [ 1, 20, '300M', '100m', 100 ],
'esn-process-sub' => [ 1, 50, '300M', '50m', 100 ],
# Other workers
'send-email-ses' => [ 40, 100, '300M', '50m', 100 ],
'synsuck' => [ 10, 30, '300M', '100m', 100 ],
# Misc site utilities
'codebuild-notifier' => [ 1, 1, '300M', '50m', 100 ],
);
my $template;