mirror of
https://github.com/jcwimer/startup-infrastructure
synced 2026-04-14 14:56:47 +00:00
Switched to kubernetes
This commit is contained in:
74
roles/kubernetes/tasks/main.yml
Normal file
74
roles/kubernetes/tasks/main.yml
Normal file
@@ -0,0 +1,74 @@
|
||||
---
|
||||
- name: Create RKE directory
|
||||
file:
|
||||
path: "{{ rke_directory }}"
|
||||
state: directory
|
||||
mode: '0774'
|
||||
owner: "{{ standard_user }}"
|
||||
group: "{{ standard_user }}"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
become: true
|
||||
|
||||
- name: Create RKE Configs directory
|
||||
file:
|
||||
path: "{{ rke_directory }}/configs"
|
||||
state: directory
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
become: true
|
||||
|
||||
- name: Install RKE
|
||||
get_url:
|
||||
dest: "{{ rke_directory }}/rke"
|
||||
url: https://github.com/rancher/rke/releases/download/v{{ rke_version }}/rke_linux-amd64
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
become: true
|
||||
|
||||
- name: Make RKE executable
|
||||
file:
|
||||
dest: "{{ rke_directory }}/rke"
|
||||
mode: +x
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
become: true
|
||||
|
||||
- name: Put RKE cluster config in place
|
||||
template:
|
||||
src: ../templates/rke-cluster-deployment.yaml.j2
|
||||
dest: "{{ rke_directory }}/{{ rke_cluster_name }}.yaml"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
become: true
|
||||
|
||||
- name: Put RKE configs in place
|
||||
template:
|
||||
src: ../templates/rke-configs/{{ item }}.j2
|
||||
dest: "{{ rke_directory }}/configs/{{ item }}"
|
||||
with_items:
|
||||
- kube-state-metrics-deployment.yaml
|
||||
- kube-state-metrics-service.yaml
|
||||
- kube-state-metrics-rbac.yaml
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
become: true
|
||||
|
||||
- name: Run RKE
|
||||
shell: >
|
||||
bash -c "{{ rke_directory }}/rke up --config {{ rke_directory }}/{{ rke_cluster_name }}.yaml"
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
become: true
|
||||
|
||||
- name: Set permissions on rke directory
|
||||
file:
|
||||
path: "{{ rke_directory }}"
|
||||
state: directory
|
||||
mode: '0774'
|
||||
owner: "{{ standard_user }}"
|
||||
group: "{{ standard_user }}"
|
||||
recurse: yes
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
become: true
|
||||
12
roles/kubernetes/tasks/post-rke.yml
Normal file
12
roles/kubernetes/tasks/post-rke.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
---
|
||||
- name: Copy RKE kube config back to nodes after RKE run
|
||||
copy:
|
||||
src: "{{ rke_directory }}/kube_config_{{ rke_cluster_name }}.yaml"
|
||||
dest: "{{ rke_node_directory }}/kube_config_{{ rke_cluster_name }}.yaml"
|
||||
become: true
|
||||
|
||||
- name: Copy RKE cluster state back to nodes after RKE run
|
||||
copy:
|
||||
src: "{{ rke_directory }}/{{ rke_cluster_name }}.rkestate"
|
||||
dest: "{{ rke_node_directory }}/{{ rke_cluster_name }}.rkestate"
|
||||
become: true
|
||||
70
roles/kubernetes/tasks/pre-rke.yml
Normal file
70
roles/kubernetes/tasks/pre-rke.yml
Normal file
@@ -0,0 +1,70 @@
|
||||
---
|
||||
- name: Update apt
|
||||
apt: update_cache=yes
|
||||
become: true
|
||||
|
||||
- name: Install programs to add debian repositories
|
||||
apt: name={{ item }} state=present force=yes
|
||||
with_items:
|
||||
- curl
|
||||
- apt-transport-https
|
||||
become: true
|
||||
|
||||
- name: Add kubernetes key
|
||||
apt_key:
|
||||
url: https://packages.cloud.google.com/apt/doc/apt-key.gpg
|
||||
state: present
|
||||
become: true
|
||||
|
||||
- name: Add kubernetes repo
|
||||
apt_repository:
|
||||
repo: deb https://apt.kubernetes.io/ kubernetes-xenial main
|
||||
state: present
|
||||
become: true
|
||||
|
||||
- name: Update apt
|
||||
apt: update_cache=yes
|
||||
become: true
|
||||
|
||||
- name: Install kubectl
|
||||
apt: name={{ item }} state=present force=yes
|
||||
with_items:
|
||||
- kubectl
|
||||
become: true
|
||||
|
||||
- name: Creates RKE directory on nodes
|
||||
file:
|
||||
path: "{{ rke_node_directory }}"
|
||||
state: directory
|
||||
mode: '0774'
|
||||
owner: "{{ standard_user }}"
|
||||
group: "{{ standard_user }}"
|
||||
become: true
|
||||
|
||||
- name: Check if RKE cluster state file exists
|
||||
stat:
|
||||
path: "{{ rke_node_directory }}/{{ rke_cluster_name }}.rkestate"
|
||||
register: cluster_state_result
|
||||
become: true
|
||||
|
||||
- name: Check if RKE kubeconfig file exists
|
||||
stat:
|
||||
path: "{{ rke_node_directory }}/kube_config_{{ rke_cluster_name }}.yaml"
|
||||
register: kube_config_result
|
||||
become: true
|
||||
|
||||
- name: Copy RKE cluster state back to local if it already exists
|
||||
fetch:
|
||||
src: "{{ rke_node_directory }}/{{ rke_cluster_name }}.rkestate"
|
||||
dest: "{{ rke_directory }}/{{ rke_cluster_name }}.rkestate"
|
||||
flat: yes
|
||||
when: cluster_state_result.stat.exists == True
|
||||
become: true
|
||||
|
||||
- name: Copy RKE kube config if it already exists
|
||||
fetch:
|
||||
src: "{{ rke_node_directory }}/kube_config_{{ rke_cluster_name }}.yaml"
|
||||
dest: "{{ rke_directory }}/kube_config_{{ rke_cluster_name }}.yaml"
|
||||
flat: yes
|
||||
when: kube_config_result.stat.exists == True
|
||||
become: true
|
||||
32
roles/kubernetes/templates/rke-cluster-deployment.yaml.j2
Normal file
32
roles/kubernetes/templates/rke-cluster-deployment.yaml.j2
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
|
||||
ssh_key_path: {{ rke_ssh_key_location }}
|
||||
|
||||
cluster_name: rke_cluster_name
|
||||
ignore_docker_version: true
|
||||
system_images:
|
||||
kubernetes: rancher/hyperkube:v1.15.5-rancher1
|
||||
|
||||
nodes:
|
||||
{% for node in groups['kube-masters'] %}
|
||||
|
||||
- address: {{node}}
|
||||
name: {{node}}
|
||||
user: {{standard_user}}
|
||||
role:
|
||||
- controlplane
|
||||
- etcd
|
||||
{% endfor %}
|
||||
{% for node in groups['kube-workers'] %}
|
||||
|
||||
- address: {{node}}
|
||||
name: {{node}}
|
||||
user: {{standard_user}}
|
||||
role:
|
||||
- worker
|
||||
{% endfor %}
|
||||
|
||||
authentication:
|
||||
strategy: x509
|
||||
sans:
|
||||
- "kubernetes.{{ root_domain }}"
|
||||
@@ -0,0 +1,28 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: alertmanager-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: EnsureExists
|
||||
data:
|
||||
alertmanager.yml: |
|
||||
global: null
|
||||
receivers:
|
||||
- name: default-receiver
|
||||
- name: email
|
||||
email_configs:
|
||||
- to: jacob.wimer@gmail.com
|
||||
from: jacob.wimer@gmail.com
|
||||
smarthost: smtp.gmail.com:587
|
||||
auth_username: "jacob.wimer@gmail.com"
|
||||
auth_identity: "jacob.wimer@gmail.com"
|
||||
auth_password: {{ alertmanager_email_password }}
|
||||
send_resolved: true
|
||||
|
||||
route:
|
||||
group_interval: 5m
|
||||
group_wait: 10s
|
||||
receiver: email
|
||||
repeat_interval: 3h
|
||||
@@ -0,0 +1,79 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: alertmanager
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v0.14.0
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: alertmanager
|
||||
version: v0.14.0
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: alertmanager
|
||||
version: v0.14.0
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ''
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
containers:
|
||||
- name: prometheus-alertmanager
|
||||
image: "prom/alertmanager:v0.14.0"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --config.file=/etc/config/alertmanager.yml
|
||||
- --storage.path=/data
|
||||
- --web.external-url=/
|
||||
ports:
|
||||
- containerPort: 9093
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /#/status
|
||||
port: 9093
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 30
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
- name: storage-volume
|
||||
mountPath: "/data"
|
||||
subPath: ""
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
- name: prometheus-alertmanager-configmap-reload
|
||||
image: "jimmidyson/configmap-reload:v0.1"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --volume-dir=/etc/config
|
||||
- --webhook-url=http://localhost:9093/-/reload
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
readOnly: true
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: alertmanager-config
|
||||
- name: storage-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: alertmanager
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: alertmanager
|
||||
spec:
|
||||
storageClassName: standard
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 2Gi
|
||||
@@ -0,0 +1,18 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
kubernetes.io/name: "Alertmanager"
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
protocol: TCP
|
||||
targetPort: 9093
|
||||
selector:
|
||||
k8s-app: alertmanager
|
||||
type: "ClusterIP"
|
||||
@@ -0,0 +1,92 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: kube-state-metrics
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v1.3.0
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-state-metrics
|
||||
version: v1.3.0
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-state-metrics
|
||||
version: v1.3.0
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ''
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
serviceAccountName: kube-state-metrics
|
||||
containers:
|
||||
- name: kube-state-metrics
|
||||
image: quay.io/coreos/kube-state-metrics:v1.3.0
|
||||
ports:
|
||||
- name: http-metrics
|
||||
containerPort: 8080
|
||||
- name: telemetry
|
||||
containerPort: 8081
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
- name: addon-resizer
|
||||
image: k8s.gcr.io/addon-resizer:1.8.4
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 30Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 30Mi
|
||||
env:
|
||||
- name: MY_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: MY_POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
command:
|
||||
- /pod_nanny
|
||||
- --config-dir=/etc/config
|
||||
- --container=kube-state-metrics
|
||||
- --cpu=100m
|
||||
- --extra-cpu=1m
|
||||
- --memory=100Mi
|
||||
- --extra-memory=2Mi
|
||||
- --threshold=5
|
||||
- --deployment=kube-state-metrics
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: kube-state-metrics-config
|
||||
---
|
||||
# Config map for resource configuration.
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: kube-state-metrics-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: kube-state-metrics
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
data:
|
||||
NannyConfiguration: |-
|
||||
apiVersion: nannyconfig/v1alpha1
|
||||
kind: NannyConfiguration
|
||||
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources:
|
||||
- statefulsets
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs: ["list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: kube-state-metrics-resizer
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- pods
|
||||
verbs: ["get"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources:
|
||||
- deployments
|
||||
resourceNames: ["kube-state-metrics"]
|
||||
verbs: ["get", "update"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kube-state-metrics
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: kube-state-metrics-resizer
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
kubernetes.io/name: "kube-state-metrics"
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
spec:
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 8080
|
||||
targetPort: http-metrics
|
||||
protocol: TCP
|
||||
- name: telemetry
|
||||
port: 8081
|
||||
targetPort: telemetry
|
||||
protocol: TCP
|
||||
selector:
|
||||
k8s-app: kube-state-metrics
|
||||
@@ -0,0 +1,32 @@
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
namespace: kube-system
|
||||
name: alertmanager
|
||||
#annotations:
|
||||
# kubernetes.io/ingress.class: traefik
|
||||
spec:
|
||||
rules:
|
||||
- host: alertmanager.{{ domain }}
|
||||
http:
|
||||
paths:
|
||||
- backend:
|
||||
serviceName: alertmanager
|
||||
servicePort: 80
|
||||
---
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
namespace: kube-system
|
||||
name: prometheus
|
||||
#annotations:
|
||||
# kubernetes.io/ingress.class: traefik
|
||||
spec:
|
||||
rules:
|
||||
- host: prometheus.{{ domain }}
|
||||
http:
|
||||
paths:
|
||||
- backend:
|
||||
serviceName: prometheus
|
||||
servicePort: 9090
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: nfs-client-provisioner
|
||||
---
|
||||
kind: Deployment
|
||||
apiVersion: extensions/v1beta1
|
||||
metadata:
|
||||
name: nfs-client-provisioner
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: nfs-client-provisioner
|
||||
spec:
|
||||
serviceAccountName: nfs-client-provisioner
|
||||
containers:
|
||||
- name: nfs-client-provisioner
|
||||
image: quay.io/external_storage/nfs-client-provisioner:latest
|
||||
volumeMounts:
|
||||
- name: nfs-client-root
|
||||
mountPath: /persistentvolumes
|
||||
env:
|
||||
- name: PROVISIONER_NAME
|
||||
value: {{ domain }}/nfs
|
||||
- name: NFS_SERVER
|
||||
value: {{ nfs_location }}
|
||||
- name: NFS_PATH
|
||||
value: {{ rke_nfs_path }}
|
||||
volumes:
|
||||
- name: nfs-client-root
|
||||
nfs:
|
||||
server: {{ nfs_location }}
|
||||
path: {{ rke_nfs_path }}
|
||||
@@ -0,0 +1,58 @@
|
||||
kind: ServiceAccount
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: nfs-client-provisioner
|
||||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: nfs-client-provisioner-runner
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumes"]
|
||||
verbs: ["get", "list", "watch", "create", "delete"]
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumeclaims"]
|
||||
verbs: ["get", "list", "watch", "update"]
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["storageclasses"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["events"]
|
||||
verbs: ["create", "update", "patch"]
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: run-nfs-client-provisioner
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: nfs-client-provisioner
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: nfs-client-provisioner-runner
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
---
|
||||
kind: Role
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: leader-locking-nfs-client-provisioner
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["endpoints"]
|
||||
verbs: ["get", "list", "watch", "create", "update", "patch"]
|
||||
---
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: leader-locking-nfs-client-provisioner
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: nfs-client-provisioner
|
||||
# replace with namespace where provisioner is deployed
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: leader-locking-nfs-client-provisioner
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
@@ -0,0 +1,7 @@
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: standard
|
||||
provisioner: {{ domain }}/nfs # or choose another name, must match deployment's env PROVISIONER_NAME'
|
||||
parameters:
|
||||
archiveOnDelete: "false"
|
||||
42
roles/kubernetes/templates/rke-configs/node-exporter.yaml.j2
Normal file
42
roles/kubernetes/templates/rke-configs/node-exporter.yaml.j2
Normal file
@@ -0,0 +1,42 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
namespace: kube-system
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
labels:
|
||||
app: node-exporter
|
||||
name: node-exporter
|
||||
name: node-exporter
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: scrape
|
||||
port: 9100
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: node-exporter
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
namespace: kube-system
|
||||
name: node-exporter
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: node-exporter
|
||||
name: node-exporter
|
||||
spec:
|
||||
containers:
|
||||
- image: prom/node-exporter
|
||||
name: node-exporter
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
hostPort: 9100
|
||||
name: scrape
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: EnsureExists
|
||||
data:
|
||||
rules.yml: |
|
||||
# raw to endraw is so jija does not fail with prometheus config's double curly brackets in it's syntax
|
||||
# {% raw %}
|
||||
groups:
|
||||
# node-exporter
|
||||
- name: alert.rules_nodes
|
||||
rules:
|
||||
- alert: high_memory_usage_on_node
|
||||
expr: ((node_memory_MemTotal - node_memory_MemAvailable) / node_memory_MemTotal)
|
||||
* 100 > 80
|
||||
for: 5m
|
||||
annotations:
|
||||
description: '{{ $labels.host }} is using a LOT of MEMORY. MEMORY usage is over
|
||||
{{ humanize $value}}%.'
|
||||
summary: HIGH MEMORY USAGE WARNING TASK ON '{{ $labels.host }}'
|
||||
- alert: high_la_usage_on_node
|
||||
expr: node_load5 > 7
|
||||
for: 5m
|
||||
annotations:
|
||||
description: '{{ $labels.host }} has a high load average. Load Average 5m is
|
||||
{{ humanize $value}}.'
|
||||
summary: HIGH LOAD AVERAGE WARNING ON '{{ $labels.host }}'
|
||||
- alert: node_running_out_of_disk_space
|
||||
expr: (node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})
|
||||
* 100 / node_filesystem_size{mountpoint="/"} > 80
|
||||
for: 5m
|
||||
annotations:
|
||||
description: More than 80% of disk used. Disk usage {{ humanize $value }}%.
|
||||
summary: 'LOW DISK SPACE WARING: NODE ''{{ $labels.host }}'''
|
||||
- alert: monitoring_service_down
|
||||
expr: up == 0
|
||||
for: 90s
|
||||
annotations:
|
||||
description: "The monitoring service '{{ $labels.job }}' is down."
|
||||
summary: "MONITORING SERVICE DOWN WARNING: NODE '{{ $labels.host }}'"
|
||||
# {% endraw %}
|
||||
|
||||
# ceph
|
||||
- name: alert.rules_ceph
|
||||
rules:
|
||||
- alert: ceph_health_warning
|
||||
expr: ceph_health_status == 1
|
||||
for: 5m
|
||||
annotations:
|
||||
description: CEPH CLUSTER HEALTH WARNING
|
||||
summary: CEPH CLUSTER HEALTH WARNING
|
||||
|
||||
prometheus.yml: |
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: ["alertmanager"]
|
||||
rule_files:
|
||||
- "rules.yml"
|
||||
scrape_configs:
|
||||
#- job_name: 'ceph'
|
||||
# static_configs:
|
||||
# - targets:
|
||||
# - 'rook-ceph-mgr-external:9283'
|
||||
- job_name: 'kubernetes-node-exporter'
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- 'node-exporter'
|
||||
type: 'A'
|
||||
port: 9100
|
||||
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
- job_name: kubernetes-apiservers
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: default;kubernetes;https
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
- __meta_kubernetes_service_name
|
||||
- __meta_kubernetes_endpoint_port_name
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
- job_name: kubernetes-nodes-kubelet
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
- job_name: kubernetes-nodes-cadvisor
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __metrics_path__
|
||||
replacement: /metrics/cadvisor
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
- job_name: kubernetes-service-endpoints
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: true
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_scrape
|
||||
- action: replace
|
||||
regex: (https?)
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_scheme
|
||||
target_label: __scheme__
|
||||
- action: replace
|
||||
regex: (.+)
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_path
|
||||
target_label: __metrics_path__
|
||||
- action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
source_labels:
|
||||
- __address__
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_port
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- action: replace
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
target_label: kubernetes_namespace
|
||||
- action: replace
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_name
|
||||
target_label: kubernetes_name
|
||||
|
||||
- job_name: kubernetes-services
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module:
|
||||
- http_2xx
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: true
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_probe
|
||||
- source_labels:
|
||||
- __address__
|
||||
target_label: __param_target
|
||||
- replacement: blackbox
|
||||
target_label: __address__
|
||||
- source_labels:
|
||||
- __param_target
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels:
|
||||
- __meta_kubernetes_service_name
|
||||
target_label: kubernetes_name
|
||||
|
||||
- job_name: kubernetes-pods
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: true
|
||||
source_labels:
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_scrape
|
||||
- action: replace
|
||||
regex: (.+)
|
||||
source_labels:
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_path
|
||||
target_label: __metrics_path__
|
||||
- action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
source_labels:
|
||||
- __address__
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_port
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- action: replace
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
target_label: kubernetes_namespace
|
||||
- action: replace
|
||||
source_labels:
|
||||
- __meta_kubernetes_pod_name
|
||||
target_label: kubernetes_pod_name
|
||||
#alerting:
|
||||
# alertmanagers:
|
||||
# - kubernetes_sd_configs:
|
||||
# - role: pod
|
||||
# tls_config:
|
||||
# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
# relabel_configs:
|
||||
# - source_labels: [__meta_kubernetes_namespace]
|
||||
# regex: kube-system
|
||||
# action: keep
|
||||
# - source_labels: [__meta_kubernetes_pod_label_k8s_app]
|
||||
# regex: alertmanager
|
||||
# action: keep
|
||||
# - source_labels: [__meta_kubernetes_pod_container_port_number]
|
||||
# regex:
|
||||
# action: drop
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- get
|
||||
- nonResourceURLs:
|
||||
- "/metrics"
|
||||
verbs:
|
||||
- get
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: kube-system
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/name: "Prometheus"
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
targetPort: 9090
|
||||
selector:
|
||||
k8s-app: prometheus
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: prometheus
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v2.2.1
|
||||
spec:
|
||||
serviceName: "prometheus"
|
||||
replicas: 1
|
||||
podManagementPolicy: "Parallel"
|
||||
updateStrategy:
|
||||
type: "RollingUpdate"
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: prometheus
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: prometheus
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ''
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
serviceAccountName: prometheus
|
||||
initContainers:
|
||||
- name: "init-chown-data"
|
||||
image: "busybox:latest"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
command: ["chown", "-R", "65534:65534", "/data"]
|
||||
volumeMounts:
|
||||
- name: prometheus-data
|
||||
mountPath: /data
|
||||
subPath: ""
|
||||
containers:
|
||||
- name: prometheus-server-configmap-reload
|
||||
image: "jimmidyson/configmap-reload:v0.1"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --volume-dir=/etc/config
|
||||
- --webhook-url=http://localhost:9090/-/reload
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
readOnly: true
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
|
||||
- name: prometheus-server
|
||||
image: "prom/prometheus:v2.2.1"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --config.file=/etc/config/prometheus.yml
|
||||
- --storage.tsdb.path=/data
|
||||
- --web.console.libraries=/etc/prometheus/console_libraries
|
||||
- --web.console.templates=/etc/prometheus/consoles
|
||||
- --web.enable-lifecycle
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /-/ready
|
||||
port: 9090
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 30
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /-/healthy
|
||||
port: 9090
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 30
|
||||
# based on 10 running nodes with 30 pods each
|
||||
resources:
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 1000Mi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 1000Mi
|
||||
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
- name: prometheus-data
|
||||
mountPath: /data
|
||||
subPath: ""
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: prometheus-config
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: prometheus-data
|
||||
spec:
|
||||
storageClassName: standard
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: "16Gi"
|
||||
|
||||
Reference in New Issue
Block a user