Added rke stuff for deploying home kubernetes

This commit is contained in:
2019-04-10 10:04:37 -04:00
parent 98810165e0
commit 9375d11649
21 changed files with 1129 additions and 3 deletions

View File

@@ -1,22 +1,39 @@
---
# Variables listed here are applicable to all host groups
# Software versions
docker_compose_version_to_install: 1.22.0
docker_ce_version_to_install: 18.03.1
nvm_version: v0.33.5
node_version: 8.4.0
# Storage Stuff
registry_location: "registry.wimer.home"
nfs_location: 10.0.0.150
nfs_share: /volumeUSB1/usbshare
# Machine configurations
home_pub_key: https://raw.githubusercontent.com/jcwimer/ubuntu-template/master/post/id_rsa.pub
standard_user: cody
git_user: "Jacob Cody Wimer"
git_email: "jacob.wimer@gmail.com"
# Proxmox Settings
proxmox_user: "root@pam"
proxmox_password: "{{ lookup('env', 'PROXMOX_PASSWORD') }}"
ubuntu_template_vm_name: "ubuntu-server-1604"
# dns
domain: wimer.home
dns_server: "10.0.0.204"
# haproxies
vip_interface: ens18
vip_address: 10.0.0.200
# rke
rke_directory: /root/rke
rke_version: 0.2.1
rke_ssh_key_location: /root/id_home
rke_nfs_path: "{{ nfs_share }}/raw-files/fileserver/shares/lab-data/kubernetes"
alertmanager_email_password: "{{ lookup('env', 'GMAIL_SERVICE_PASSWORD') }}"

View File

@@ -34,6 +34,12 @@
tasks:
- include: ../roles/developer-machine/tasks/main.yml
- name: Set up Kubernetes
hosts: localhost
user: root
tasks:
- include: ../roles/kubernetes/tasks/main.yml
- name: Initialize the swarm
hosts: swarm-bootstrap
user: root

View File

@@ -0,0 +1,65 @@
---
- name: Create RKE directory
file:
path: "{{ rke_directory }}"
state: directory
delegate_to: localhost
run_once: true
- name: Create RKE Configs directory
file:
path: "{{ rke_directory }}/configs"
state: directory
delegate_to: localhost
run_once: true
- name: Install RKE
get_url:
dest: "{{ rke_directory }}/rke"
url: https://github.com/rancher/rke/releases/download/v{{ rke_version }}/rke_linux-amd64
delegate_to: localhost
run_once: true
- name: Make RKE executable
file:
dest: "{{ rke_directory }}/rke"
mode: +x
delegate_to: localhost
run_once: true
- name: Put RKE cluster config in place
template:
src: ../templates/rke-cluster.yaml.j2
dest: "{{ rke_directory }}/rke-cluster.yaml"
delegate_to: localhost
run_once: true
- name: Put RKE configs in place
template:
src: ../templates/rke-configs/{{ item }}.j2
dest: "{{ rke_directory }}/configs/{{ item }}"
with_items:
- nfs-client-deployment.yaml
- nfs-client-rbac.yaml
- nfs-client-storageclass.yaml
- alertmanager-pvc.yaml
- alertmanager-configmap.yaml
- alertmanager-deployment.yaml
- alertmanager-service.yaml
- kube-state-metrics-deployment.yaml
- kube-state-metrics-service.yaml
- kube-state-metrics-rbac.yaml
- node-exporter.yaml
- prometheus-configmap.yaml
- prometheus-rbac.yaml
- prometheus-statefulset.yaml
- prometheus-service.yaml
- monitoring-ingress.yaml
delegate_to: localhost
run_once: true
- name: Run RKE
shell: >
bash -c "{{ rke_directory }}/rke up --config {{ rke_directory }}/rke-cluster.yaml"
delegate_to: localhost
run_once: true

View File

@@ -0,0 +1,52 @@
---
ssh_key_path: {{ rke_ssh_key_location }}
cluster_name: rke-k8s
ignore_docker_version: true
kubernetes_version: v1.13.4-rancher1-2
system_images:
kubernetes: rancher/hyperkube:v1.13.4-rancher1
nodes:
{% for node in groups['kube-masters'] %}
- address: {{ hostvars[node]['ansible_host'] }}
name: {{node}}
user: {{standard_user}}
role:
- controlplane
- etcd
{% endfor %}
{% for node in groups['kube-workers'] %}
- address: {{ hostvars[node]['ansible_host'] }}
name: {{node}}
user: {{standard_user}}
role:
- worker
{% endfor %}
authentication:
strategy: x509
sans:
- "{{ vip_address }}"
- "kube.{{ domain }}"
addons_include:
- ./configs/nfs-client-deployment.yaml
- ./configs/nfs-client-rbac.yaml
- ./configs/nfs-client-storageclass.yaml
- ./configs/alertmanager-pvc.yaml
- ./configs/alertmanager-configmap.yaml
- ./configs/alertmanager-deployment.yaml
- ./configs/alertmanager-service.yaml
- ./configs/kube-state-metrics-deployment.yaml
- ./configs/kube-state-metrics-service.yaml
- ./configs/kube-state-metrics-rbac.yaml
- ./configs/node-exporter.yaml
- ./configs/prometheus-configmap.yaml
- ./configs/prometheus-rbac.yaml
- ./configs/prometheus-statefulset.yaml
- ./configs/prometheus-service.yaml
- ./configs/monitoring-ingress.yaml

View File

@@ -0,0 +1,28 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
data:
alertmanager.yml: |
global: null
receivers:
- name: default-receiver
- name: email
email_configs:
- to: jacob.wimer@gmail.com
from: jacob.wimer@gmail.com
smarthost: smtp.gmail.com:587
auth_username: "jacob.wimer@gmail.com"
auth_identity: "jacob.wimer@gmail.com"
auth_password: {{ alertmanager_email_password }}
send_resolved: true
route:
group_interval: 5m
group_wait: 10s
receiver: email
repeat_interval: 3h

View File

@@ -0,0 +1,79 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: alertmanager
namespace: kube-system
labels:
k8s-app: alertmanager
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v0.14.0
spec:
replicas: 1
selector:
matchLabels:
k8s-app: alertmanager
version: v0.14.0
template:
metadata:
labels:
k8s-app: alertmanager
version: v0.14.0
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-cluster-critical
containers:
- name: prometheus-alertmanager
image: "prom/alertmanager:v0.14.0"
imagePullPolicy: "IfNotPresent"
args:
- --config.file=/etc/config/alertmanager.yml
- --storage.path=/data
- --web.external-url=/
ports:
- containerPort: 9093
readinessProbe:
httpGet:
path: /#/status
port: 9093
initialDelaySeconds: 30
timeoutSeconds: 30
volumeMounts:
- name: config-volume
mountPath: /etc/config
- name: storage-volume
mountPath: "/data"
subPath: ""
resources:
limits:
cpu: 10m
memory: 50Mi
requests:
cpu: 10m
memory: 50Mi
- name: prometheus-alertmanager-configmap-reload
image: "jimmidyson/configmap-reload:v0.1"
imagePullPolicy: "IfNotPresent"
args:
- --volume-dir=/etc/config
- --webhook-url=http://localhost:9093/-/reload
volumeMounts:
- name: config-volume
mountPath: /etc/config
readOnly: true
resources:
limits:
cpu: 10m
memory: 10Mi
requests:
cpu: 10m
memory: 10Mi
volumes:
- name: config-volume
configMap:
name: alertmanager-config
- name: storage-volume
persistentVolumeClaim:
claimName: alertmanager

View File

@@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: alertmanager
namespace: kube-system
labels:
app: alertmanager
spec:
storageClassName: standard
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi

View File

@@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
name: alertmanager
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/name: "Alertmanager"
spec:
ports:
- name: http
port: 80
protocol: TCP
targetPort: 9093
selector:
k8s-app: alertmanager
type: "ClusterIP"

View File

@@ -0,0 +1,92 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
k8s-app: kube-state-metrics
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v1.3.0
spec:
selector:
matchLabels:
k8s-app: kube-state-metrics
version: v1.3.0
replicas: 1
template:
metadata:
labels:
k8s-app: kube-state-metrics
version: v1.3.0
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-cluster-critical
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: quay.io/coreos/kube-state-metrics:v1.3.0
ports:
- name: http-metrics
containerPort: 8080
- name: telemetry
containerPort: 8081
readinessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
- name: addon-resizer
image: k8s.gcr.io/addon-resizer:1.8.4
resources:
limits:
cpu: 100m
memory: 30Mi
requests:
cpu: 100m
memory: 30Mi
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- name: config-volume
mountPath: /etc/config
command:
- /pod_nanny
- --config-dir=/etc/config
- --container=kube-state-metrics
- --cpu=100m
- --extra-cpu=1m
- --memory=100Mi
- --extra-memory=2Mi
- --threshold=5
- --deployment=kube-state-metrics
volumes:
- name: config-volume
configMap:
name: kube-state-metrics-config
---
# Config map for resource configuration.
apiVersion: v1
kind: ConfigMap
metadata:
name: kube-state-metrics-config
namespace: kube-system
labels:
k8s-app: kube-state-metrics
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
data:
NannyConfiguration: |-
apiVersion: nannyconfig/v1alpha1
kind: NannyConfiguration

View File

@@ -0,0 +1,104 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kube-state-metrics
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups: [""]
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources:
- daemonsets
- deployments
- replicasets
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources:
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- cronjobs
- jobs
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources:
- horizontalpodautoscalers
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: kube-state-metrics-resizer
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups: [""]
resources:
- pods
verbs: ["get"]
- apiGroups: ["extensions"]
resources:
- deployments
resourceNames: ["kube-state-metrics"]
verbs: ["get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kube-state-metrics-resizer
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system

View File

@@ -0,0 +1,23 @@
apiVersion: v1
kind: Service
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/name: "kube-state-metrics"
annotations:
prometheus.io/scrape: 'true'
spec:
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
protocol: TCP
- name: telemetry
port: 8081
targetPort: telemetry
protocol: TCP
selector:
k8s-app: kube-state-metrics

View File

@@ -0,0 +1,32 @@
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
namespace: kube-system
name: alertmanager
#annotations:
# kubernetes.io/ingress.class: traefik
spec:
rules:
- host: alertmanager.{{ domain }}
http:
paths:
- backend:
serviceName: alertmanager
servicePort: 80
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
namespace: kube-system
name: prometheus
#annotations:
# kubernetes.io/ingress.class: traefik
spec:
rules:
- host: prometheus.{{ domain }}
http:
paths:
- backend:
serviceName: prometheus
servicePort: 9090

View File

@@ -0,0 +1,37 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-client-provisioner
---
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: nfs-client-provisioner
spec:
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
serviceAccountName: nfs-client-provisioner
containers:
- name: nfs-client-provisioner
image: quay.io/external_storage/nfs-client-provisioner:latest
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: {{ domain }}/nfs
- name: NFS_SERVER
value: {{ nfs_location }}
- name: NFS_PATH
value: {{ rke_nfs_path }}
volumes:
- name: nfs-client-root
nfs:
server: {{ nfs_location }}
path: {{ rke_nfs_path }}

View File

@@ -0,0 +1,58 @@
kind: ServiceAccount
apiVersion: v1
metadata:
name: nfs-client-provisioner
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-client-provisioner-runner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-client-provisioner
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
namespace: kube-system
roleRef:
kind: ClusterRole
name: nfs-client-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
rules:
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: kube-system
roleRef:
kind: Role
name: leader-locking-nfs-client-provisioner
apiGroup: rbac.authorization.k8s.io

View File

@@ -0,0 +1,7 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: standard
provisioner: {{ domain }}/nfs # or choose another name, must match deployment's env PROVISIONER_NAME'
parameters:
archiveOnDelete: "false"

View File

@@ -0,0 +1,42 @@
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
annotations:
prometheus.io/scrape: 'true'
labels:
app: node-exporter
name: node-exporter
name: node-exporter
spec:
clusterIP: None
ports:
- name: scrape
port: 9100
protocol: TCP
selector:
app: node-exporter
type: ClusterIP
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
namespace: kube-system
name: node-exporter
spec:
template:
metadata:
labels:
app: node-exporter
name: node-exporter
spec:
containers:
- image: prom/node-exporter
name: node-exporter
ports:
- containerPort: 9100
hostPort: 9100
name: scrape
hostNetwork: true
hostPID: true

View File

@@ -0,0 +1,236 @@
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
data:
rules.yml: |
# raw to endraw is so jija does not fail with prometheus config's double curly brackets in it's syntax
# {% raw %}
groups:
# node-exporter
- name: alert.rules_nodes
rules:
- alert: high_memory_usage_on_node
expr: ((node_memory_MemTotal - node_memory_MemAvailable) / node_memory_MemTotal)
* 100 > 80
for: 5m
annotations:
description: '{{ $labels.host }} is using a LOT of MEMORY. MEMORY usage is over
{{ humanize $value}}%.'
summary: HIGH MEMORY USAGE WARNING TASK ON '{{ $labels.host }}'
- alert: high_la_usage_on_node
expr: node_load5 > 7
for: 5m
annotations:
description: '{{ $labels.host }} has a high load average. Load Average 5m is
{{ humanize $value}}.'
summary: HIGH LOAD AVERAGE WARNING ON '{{ $labels.host }}'
- alert: node_running_out_of_disk_space
expr: (node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})
* 100 / node_filesystem_size{mountpoint="/"} > 80
for: 5m
annotations:
description: More than 80% of disk used. Disk usage {{ humanize $value }}%.
summary: 'LOW DISK SPACE WARING: NODE ''{{ $labels.host }}'''
- alert: monitoring_service_down
expr: up == 0
for: 90s
annotations:
description: "The monitoring service '{{ $labels.job }}' is down."
summary: "MONITORING SERVICE DOWN WARNING: NODE '{{ $labels.host }}'"
# {% endraw %}
# ceph
- name: alert.rules_ceph
rules:
- alert: ceph_health_warning
expr: ceph_health_status == 1
for: 5m
annotations:
description: CEPH CLUSTER HEALTH WARNING
summary: CEPH CLUSTER HEALTH WARNING
prometheus.yml: |
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager"]
rule_files:
- "rules.yml"
scrape_configs:
#- job_name: 'ceph'
# static_configs:
# - targets:
# - 'rook-ceph-mgr-external:9283'
- job_name: 'kubernetes-node-exporter'
dns_sd_configs:
- names:
- 'node-exporter'
type: 'A'
port: 9100
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
- job_name: kubernetes-apiservers
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: default;kubernetes;https
source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
- job_name: kubernetes-nodes-kubelet
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
- job_name: kubernetes-nodes-cadvisor
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __metrics_path__
replacement: /metrics/cadvisor
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
- job_name: kubernetes-service-endpoints
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
- action: replace
regex: (https?)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
target_label: __scheme__
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name
- job_name: kubernetes-services
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module:
- http_2xx
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_probe
- source_labels:
- __address__
target_label: __param_target
- replacement: blackbox
target_label: __address__
- source_labels:
- __param_target
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_scrape
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_pod_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: kubernetes_pod_name
#alerting:
# alertmanagers:
# - kubernetes_sd_configs:
# - role: pod
# tls_config:
# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# relabel_configs:
# - source_labels: [__meta_kubernetes_namespace]
# regex: kube-system
# action: keep
# - source_labels: [__meta_kubernetes_pod_label_k8s_app]
# regex: alertmanager
# action: keep
# - source_labels: [__meta_kubernetes_pod_container_port_number]
# regex:
# action: drop

View File

@@ -0,0 +1,56 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: prometheus
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- nonResourceURLs:
- "/metrics"
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: prometheus
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: kube-system

View File

@@ -0,0 +1,18 @@
kind: Service
apiVersion: v1
metadata:
name: prometheus
namespace: kube-system
labels:
kubernetes.io/name: "Prometheus"
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
spec:
ports:
- name: http
port: 9090
protocol: TCP
targetPort: 9090
selector:
k8s-app: prometheus

View File

@@ -0,0 +1,110 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: prometheus
namespace: kube-system
labels:
k8s-app: prometheus
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v2.2.1
spec:
serviceName: "prometheus"
replicas: 1
podManagementPolicy: "Parallel"
updateStrategy:
type: "RollingUpdate"
selector:
matchLabels:
k8s-app: prometheus
template:
metadata:
labels:
k8s-app: prometheus
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-cluster-critical
serviceAccountName: prometheus
initContainers:
- name: "init-chown-data"
image: "busybox:latest"
imagePullPolicy: "IfNotPresent"
command: ["chown", "-R", "65534:65534", "/data"]
volumeMounts:
- name: prometheus-data
mountPath: /data
subPath: ""
containers:
- name: prometheus-server-configmap-reload
image: "jimmidyson/configmap-reload:v0.1"
imagePullPolicy: "IfNotPresent"
args:
- --volume-dir=/etc/config
- --webhook-url=http://localhost:9090/-/reload
volumeMounts:
- name: config-volume
mountPath: /etc/config
readOnly: true
resources:
limits:
cpu: 10m
memory: 10Mi
requests:
cpu: 10m
memory: 10Mi
- name: prometheus-server
image: "prom/prometheus:v2.2.1"
imagePullPolicy: "IfNotPresent"
args:
- --config.file=/etc/config/prometheus.yml
- --storage.tsdb.path=/data
- --web.console.libraries=/etc/prometheus/console_libraries
- --web.console.templates=/etc/prometheus/consoles
- --web.enable-lifecycle
ports:
- containerPort: 9090
readinessProbe:
httpGet:
path: /-/ready
port: 9090
initialDelaySeconds: 30
timeoutSeconds: 30
livenessProbe:
httpGet:
path: /-/healthy
port: 9090
initialDelaySeconds: 30
timeoutSeconds: 30
# based on 10 running nodes with 30 pods each
resources:
limits:
cpu: 200m
memory: 1000Mi
requests:
cpu: 200m
memory: 1000Mi
volumeMounts:
- name: config-volume
mountPath: /etc/config
- name: prometheus-data
mountPath: /data
subPath: ""
terminationGracePeriodSeconds: 300
volumes:
- name: config-volume
configMap:
name: prometheus-config
volumeClaimTemplates:
- metadata:
name: prometheus-data
spec:
storageClassName: standard
accessModes:
- ReadWriteOnce
resources:
requests:
storage: "16Gi"

View File

@@ -1,6 +1,38 @@
#!/bin/bash
#keep adding dirname's to go up more directories.
project_dir="$(dirname $( dirname $(readlink -f ${BASH_SOURCE[0]})))"
#ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook -i ${project_dir}/hosts ${project_dir}/playbooks/lxc-test.yml
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook -i ${project_dir}/hosts ${project_dir}/playbooks/kvm.yml
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook -i ${project_dir}/hosts ${project_dir}/playbooks/site.yml
declare -ar REQUIRED_ENVIRONMENT_VARIABLES=(
"PROXMOX_PASSWORD"
"GMAIL_SERVICE_PASSWORD"
)
main() {
check-env
run-ansible
}
check-env() {
local -a undefined_variables=()
for var in "${REQUIRED_ENVIRONMENT_VARIABLES[@]}"; do
if [[ ! -v ${var} ]]; then
undefined_variables+=("${var}")
fi
done
if [[ "${#undefined_variables[@]}" -gt 0 ]]; then
echo "${red}ERROR: The following environment variables must be defined:"
printf ' %s\n' "${undefined_variables[@]}"
echo "${reset}"
exit 1
fi
}
run-ansible() {
#ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook -i ${project_dir}/hosts ${project_dir}/playbooks/lxc-test.yml
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook -i ${project_dir}/hosts ${project_dir}/playbooks/kvm.yml
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook -i ${project_dir}/hosts ${project_dir}/playbooks/site.yml
}
[[ $0 == "${BASH_SOURCE}" ]] && main "$@"