diff --git a/README.md b/README.md index 90f77c3..7e834c8 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,29 @@ # startup-infrastructure -This is an Ansible playbook that configures a Docker Swarm cluster and deploys a full set of self hosted tools for a new company. After you point a wildcard A record to the swarm, you will be able to access the following tools. The goal of this project is to create a one stop shop for self hosting your infrastructure. +This is an Ansible playbook that configures a Kubernetes cluster and deploys a full set of self hosted tools for a new company. After you point a wildcard A record to the cluster, you will be able to access the following tools. The goal of this project is to create a one stop shop for self hosting your infrastructure. ### Services -1. Docker management GUI - Using [Portainer](https://portainer.io/) -2. Internal swarm load balancer and letsencrypt endpoint - [Traefik](https://traefik.io/) - -### URLs in your environment. If you use ssl obviously these will be https Assuming your A record is *.test.com: -1. http://portainer.test.com/ - Portainer -2. http://swarm.test.com:8081/ - Traefik load balancer dashboard +1. Kanban board - Using [Wekan](https://wekan.github.io/). This is connected to Mongo DB and will be accessable at http://kanban.root_domain +2. Mysql cluster - Using [Presslabs Mysql Operator](https://www.presslabs.com/docs/mysql-operator/getting-started/) +3. MongoDB cluster # Deploy -### Requirements +### Requirements for deployment machine (for the machine you're executing the deploy from) 1. Python 2. Pip 3. Pipenv -4. SSH access to all nodes you're deploying to. - * You will need to define and environment variable for your ssh key. `export PRIVATE_KEY="/location/of/key"` - * OR you will need a ssh agent running +4. Docker +5. SSH access to all nodes you're deploying to. + +### Requirements for infrastructure (the machines you're deploying to) +1. Ubuntu +2. Internet access ### Steps -1. Copy hosts.example to hosts +1. Copy `hosts.example` to `hosts` * Put ip addresses under the sections - * Bootstrap will be the first node in the cluster. If you are only doing a one node cluster, this is where you put your ip - * Managers are nodes used for managing a swarm cluster. Managers are recommended in 3's or 5's (bootstrap is a manager). Please see this for swarm best practices: https://docs.docker.com/engine/swarm/admin_guide/ + * Masters are nodes used for managing a kubernetes cluster. It is recommended to has 3 or 5 masters. * Workers are nodes used for running containers. You can have as many as necessary. 2. Copy group_vars/all.example to group_vars/all * This is where a lot of configuration comes in. Please see our documentation. @@ -40,8 +39,10 @@ You can easily run a lab environment with Vagrant. 3. Run `vagrant up` 4. Run `vagrant ssh client -c 'bash /vagrant/tests/files/run-test-deploy.sh'` 5. Put the following in your `/etc/hosts` file: - * `192.168.254.2 swarm.test.com` - * `192.168.254.2 portainer.test.com` +``` +192.168.254.2 kanban.test.com +192.168.254.2 mysql-orchestrator.test.com +``` 6. Now navigate to any of the services at http://servicename.test.com diff --git a/Vagrantfile b/Vagrantfile index 9bd535a..d9fffb9 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -1,29 +1,29 @@ Vagrant.configure("2") do |config| - config.vm.define "bootstrap" do |bootstrap| - bootstrap.vm.box = "ubuntu/bionic64" + config.vm.define "master1" do |bootstrap| + bootstrap.vm.box = "debian/stretch64" bootstrap.vm.hostname = "bootstrap" bootstrap.vm.network "private_network", ip: "192.168.254.2" bootstrap.vm.provision :shell, path: "tests/files/provision-script.sh" bootstrap.vm.provider "virtualbox" do |v| - v.memory = 1024 - v.cpus = 1 + v.memory = 2048 + v.cpus = 2 end end config.vm.define "worker1" do |worker1| - worker1.vm.box = "ubuntu/bionic64" + worker1.vm.box = "debian/stretch64" worker1.vm.hostname = "worker1" worker1.vm.network "private_network", ip: "192.168.254.3" worker1.vm.provision :shell, path: "tests/files/provision-script.sh" worker1.vm.provider "virtualbox" do |v| - v.memory = 1024 - v.cpus = 1 + v.memory = 2048 + v.cpus = 2 end end config.vm.define "client" do |client| - client.vm.box = "ubuntu/bionic64" + client.vm.box = "debian/stretch64" client.vm.hostname = "client" client.vm.network "private_network", ip: "192.168.254.4" client.vm.provision :shell, path: "tests/files/install-pip.sh" diff --git a/ansible.cfg b/ansible.cfg deleted file mode 100644 index 33665ad..0000000 --- a/ansible.cfg +++ /dev/null @@ -1,4 +0,0 @@ -[defaults] -host_key_checking = False -#private_key_file = ~/.ssh/id_rsa -#remote_user = root \ No newline at end of file diff --git a/group_vars/all.example b/group_vars/all.example index 0848d08..ae32e20 100644 --- a/group_vars/all.example +++ b/group_vars/all.example @@ -2,39 +2,18 @@ # Variables listed here are applicable to all host groups ### Software versions -docker_compose_version_to_install: 1.18.0 docker_ce_version_to_install: 18.03.1 +### rke variables +rke_directory: /root/rke +rke_ssh_key_location: /root/id_rsa +rke_node_directory: /opt/rke +rke_version: 0.3.1 +rke_cluster_name: rke-k8s + ### User stuff standard_user: vagrant chosen_timezone: "America/New_York" # root domain for all services. You should have an A record for *.root_domain. For example, if your domain is test.com you should have an A record for *.test.com pointing to your node. # this will allow automatic dns for for things like dokuwiki.test.com and portainer.test.com -root_domain: test.com - -# interface for the swarm network -swarm_network_interface: eth1 - -### Persistent storage if you are doing a single machine deploy, local is an option. If you are doing multi instance deploy, choose one of the following: -# nfs -# digitalocean -# gcp -# aws -# openstack -# ceph -# then fill out the variables associated with your choice below -storage_type: "local" - -# nfs variables: these need defined if using nfs storage -#nfs_address: 0.0.0.0 -#nfs_root_path: /some/filepath/on/nfs - -# digitalocean variables: these need defined if using digitalocean storage - -# gcp variables: these need defined if using gcp storage - -# aws variables: these need defined if using aws storage - -# openstack variables: these need defined if using openstack storage - -# ceph variables: these need defined if using ceph storage \ No newline at end of file +root_domain: test.com \ No newline at end of file diff --git a/hosts.example b/hosts.example index add8dcf..86f4968 100644 --- a/hosts.example +++ b/hosts.example @@ -1,10 +1,12 @@ -# Node where config files get copied to and docker swarm gets initiated -# replace localhost with the ip of your bootstrap node -[bootstrap] -localhost +# Define ssh variables +# Be sure to define your ssh user and your private key path +[all:vars] +ansible_ssh_common_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' +ansible_private_key_file=/vagrant/lab/test_rsa +ansible_user=root -# nodes that will be swarm managers (note these will not host services) -[non-bootstrap-managers] +# kubernetes masters +[kube-masters] -# nodes that will be swarm workers (note these will need to have more resources than managers) -[workers] \ No newline at end of file +# kubernetes workers +[kube-workers] \ No newline at end of file diff --git a/playbooks/kubernetes.yml b/playbooks/kubernetes.yml new file mode 100644 index 0000000..62ed5a1 --- /dev/null +++ b/playbooks/kubernetes.yml @@ -0,0 +1,19 @@ +--- +- name: Pre rke + hosts: kube-masters kube-workers + gather_facts: yes + serial: 100% + tasks: + - include: ../roles/kubernetes/tasks/pre-rke.yml + +- name: Set up Kubernetes + hosts: localhost + tasks: + - include: ../roles/kubernetes/tasks/main.yml + +- name: Post rke + hosts: kube-masters kube-workers + gather_facts: yes + serial: 100% + tasks: + - include: ../roles/kubernetes/tasks/post-rke.yml \ No newline at end of file diff --git a/playbooks/site.yml b/playbooks/site.yml index 907700e..bc52695 100644 --- a/playbooks/site.yml +++ b/playbooks/site.yml @@ -19,11 +19,10 @@ tasks: - include: ../roles/common/tasks/main.yml -- import_playbook: swarm.yml +- import_playbook: kubernetes.yml -- name: Deploy startup-infrastructure swarm stack - hosts: bootstrap - user: root - serial: 100% +- name: Deploy startup-infrastructure to kubernetes + hosts: localhost + connection: local tasks: - include: ../roles/startup-infrastructure/tasks/main.yml \ No newline at end of file diff --git a/playbooks/swarm.yml b/playbooks/swarm.yml deleted file mode 100644 index 3184402..0000000 --- a/playbooks/swarm.yml +++ /dev/null @@ -1,64 +0,0 @@ ---- -- name: Initialize the swarm - hosts: bootstrap - user: root - gather_facts: true - serial: 100% - tasks: - - name: Print ansible interfaces - debug: - msg: "{{ ansible_interfaces }}" - - # - name: Get ip of swarm_network_interface by parsing all interfaces - # set_fact: - # swarm_init_ip={{hostvars[inventory_hostname]['ansible_item']['ipv4']['address']}} - # when: (item == swarm_network_interface) - # with_items: - # - "{{ ansible_interfaces }}" - - name: Set interface var name fact - set_fact: - swarm_interface_var_name: "ansible_{{ swarm_network_interface }}" - - - name: Set swarm advertise ip address - set_fact: - swarm_init_ip: "{{ hostvars[inventory_hostname][swarm_interface_var_name]['ipv4']['address'] }}" - - - name: Print swarm init ip address - debug: - msg: "{{ swarm_init_ip }}" - - - include_tasks: ../tasks/swarm-bootstrap.yml - vars: - join_addr: "{{ swarm_init_ip }}" - -- name: Add additional managers to the swarm - hosts: managers - user: root - gather_facts: false - serial: 100% - vars: - manager_join_key: - "{{ hostvars[groups['bootstrap'][0]]['manager_key']['stdout'] }}" - swarm_init_ip: - "{{ hostvars[groups['bootstrap'][0]]['swarm_init_ip'] }}" - tasks: - - include_tasks: ../tasks/swarm-join.yml - vars: - join_addr: "{{ swarm_init_ip }}" - join_key: "{{ manager_join_key }}" - -- name: Add workers to the swarm - hosts: workers - user: root - gather_facts: false - serial: 100% - vars: - worker_join_key: - "{{ hostvars[groups['bootstrap'][0]]['worker_key']['stdout'] }}" - swarm_init_ip: - "{{ hostvars[groups['bootstrap'][0]]['swarm_init_ip'] }}" - tasks: - - include_tasks: ../tasks/swarm-join.yml - vars: - join_addr: "{{ swarm_init_ip }}" - join_key: "{{ worker_join_key }}" \ No newline at end of file diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index 5b523f9..4b2b02e 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -1,25 +1,17 @@ --- # This playbook contains plays that will run on all nodes -- name: Add docker key - apt_key: - url: https://download.docker.com/linux/ubuntu/gpg - state: present -- name: Add docker repo - apt_repository: - repo: deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable - state: present - -- name: Update apt - apt: update_cache=yes #- name: Upgrade APT to the lastest packages # apt: upgrade=dist - +- name: Update apt + apt: update_cache=yes + - name: Install standard programs apt: name={{ item }} state=present force=yes with_items: + - apt-transport-https - htop - curl - openssh-server @@ -40,6 +32,19 @@ - tmux - sudo +- name: Add docker key + apt_key: + url: https://download.docker.com/linux/{{ ansible_distribution|lower }}/gpg + state: present + +- name: Add docker repo + apt_repository: + repo: deb [arch=amd64] https://download.docker.com/linux/{{ ansible_distribution|lower }} {{ ansible_distribution_release }} stable + state: present + +- name: Update apt + apt: update_cache=yes + - name: Install docker-ce shell: > apt-get install -y -qq docker-ce=$(apt-cache madison docker-ce | grep "{{ docker_ce_version_to_install }}" | awk {'print $3'}) @@ -50,14 +55,6 @@ groups: docker append: yes -- name: Docker compose version - get_url: - url: "https://github.com/docker/compose/releases/download/{{docker_compose_version_to_install}}/docker-compose-{{ ansible_system }}-{{ ansible_userspace_architecture }}" - dest: /usr/local/bin/docker-compose - validate_certs: false - mode: 755 - group: docker - - name: Set timezone to NewYork timezone: name: "{{ chosen_timezone }}" diff --git a/roles/dokuwiki/tasks/main.yml b/roles/dokuwiki/tasks/main.yml deleted file mode 100644 index 7f6a721..0000000 --- a/roles/dokuwiki/tasks/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -- name: Replace sudoers file - template: src=../roles/dokuwiki/templates/docker-compose.yml.j2 dest=/data/dokuwiki.yml - -- name: Run docker-compose - shell: cd /data && docker-compose -f dokuwiki.yml up -d \ No newline at end of file diff --git a/roles/dokuwiki/templates/docker-compose.yml.j2 b/roles/dokuwiki/templates/docker-compose.yml.j2 deleted file mode 100644 index a918ced..0000000 --- a/roles/dokuwiki/templates/docker-compose.yml.j2 +++ /dev/null @@ -1,22 +0,0 @@ -version: '2' -services: - dokuwiki: - image: 'bitnami/dokuwiki:latest' - labels: - kompose.service.type: nodeport - ports: - - '80:80' - - '443:443' - volumes: - - 'dokuwiki_data:/bitnami' -volumes: - dokuwiki_data: - {% if storage_type == 'nfs' %} - driver: local - driver_opts: - type: nfs - o: "addr={{ nfs_address }},soft,nolock,rw" - device: ":{{ nfs_root_path }}/dokuwiki" - {% elif storage_type == 'local' %} - driver: local - {% endif %} \ No newline at end of file diff --git a/roles/gitea/tasks/main.yml b/roles/gitea/tasks/main.yml deleted file mode 100644 index 5820968..0000000 --- a/roles/gitea/tasks/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -- name: Replace sudoers file - template: src=../roles/gitea/templates/docker-compose.yml.j2 dest=/data/gitea.yml - -- name: Run docker-compose - shell: cd /data && docker-compose -f gitea.yml up -d \ No newline at end of file diff --git a/roles/gitea/templates/docker-compose.yml.j2 b/roles/gitea/templates/docker-compose.yml.j2 deleted file mode 100644 index 5a48afe..0000000 --- a/roles/gitea/templates/docker-compose.yml.j2 +++ /dev/null @@ -1,45 +0,0 @@ -version: '2' -services: - web: - image: gitea/gitea:1.3.2 - volumes: - - gitea_data:/data - ports: - - "80:3000" - - "2222:22" - restart: always - -volumes: - gitea_data: - {% if storage_type == 'nfs' %} - driver: local - driver_opts: - type: nfs - o: "addr={{ nfs_address }},soft,nolock,rw" - device: ":{{ nfs_root_path }}/gitea" - {% elif storage_type == 'local' %} - driver: local - {% endif %} - - -# setup options: https://docs.gitea.io/en-us/config-cheat-sheet/ -# networks: -# - db -# depends_on: -# - db -# db: -# image: mariadb:10 -# restart: always -# networks: -# - db -# environment: -# - MYSQL_ROOT_PASSWORD=changeme -# - MYSQL_DATABASE=gitea -# - MYSQL_USER=gitea -# - MYSQL_PASSWORD=changeme -# volumes: -# - db/:/var/lib/mysql -#networks: -# db: -#volumes: -# db: \ No newline at end of file diff --git a/roles/kubernetes/tasks/main.yml b/roles/kubernetes/tasks/main.yml new file mode 100644 index 0000000..30487ea --- /dev/null +++ b/roles/kubernetes/tasks/main.yml @@ -0,0 +1,74 @@ +--- +- name: Create RKE directory + file: + path: "{{ rke_directory }}" + state: directory + mode: '0774' + owner: "{{ standard_user }}" + group: "{{ standard_user }}" + delegate_to: localhost + run_once: true + become: true + +- name: Create RKE Configs directory + file: + path: "{{ rke_directory }}/configs" + state: directory + delegate_to: localhost + run_once: true + become: true + +- name: Install RKE + get_url: + dest: "{{ rke_directory }}/rke" + url: https://github.com/rancher/rke/releases/download/v{{ rke_version }}/rke_linux-amd64 + delegate_to: localhost + run_once: true + become: true + +- name: Make RKE executable + file: + dest: "{{ rke_directory }}/rke" + mode: +x + delegate_to: localhost + run_once: true + become: true + +- name: Put RKE cluster config in place + template: + src: ../templates/rke-cluster-deployment.yaml.j2 + dest: "{{ rke_directory }}/{{ rke_cluster_name }}.yaml" + delegate_to: localhost + run_once: true + become: true + +- name: Put RKE configs in place + template: + src: ../templates/rke-configs/{{ item }}.j2 + dest: "{{ rke_directory }}/configs/{{ item }}" + with_items: + - kube-state-metrics-deployment.yaml + - kube-state-metrics-service.yaml + - kube-state-metrics-rbac.yaml + delegate_to: localhost + run_once: true + become: true + +- name: Run RKE + shell: > + bash -c "{{ rke_directory }}/rke up --config {{ rke_directory }}/{{ rke_cluster_name }}.yaml" + delegate_to: localhost + run_once: true + become: true + +- name: Set permissions on rke directory + file: + path: "{{ rke_directory }}" + state: directory + mode: '0774' + owner: "{{ standard_user }}" + group: "{{ standard_user }}" + recurse: yes + delegate_to: localhost + run_once: true + become: true \ No newline at end of file diff --git a/roles/kubernetes/tasks/post-rke.yml b/roles/kubernetes/tasks/post-rke.yml new file mode 100644 index 0000000..b9dbe98 --- /dev/null +++ b/roles/kubernetes/tasks/post-rke.yml @@ -0,0 +1,12 @@ +--- +- name: Copy RKE kube config back to nodes after RKE run + copy: + src: "{{ rke_directory }}/kube_config_{{ rke_cluster_name }}.yaml" + dest: "{{ rke_node_directory }}/kube_config_{{ rke_cluster_name }}.yaml" + become: true + +- name: Copy RKE cluster state back to nodes after RKE run + copy: + src: "{{ rke_directory }}/{{ rke_cluster_name }}.rkestate" + dest: "{{ rke_node_directory }}/{{ rke_cluster_name }}.rkestate" + become: true \ No newline at end of file diff --git a/roles/kubernetes/tasks/pre-rke.yml b/roles/kubernetes/tasks/pre-rke.yml new file mode 100644 index 0000000..17369a4 --- /dev/null +++ b/roles/kubernetes/tasks/pre-rke.yml @@ -0,0 +1,70 @@ +--- +- name: Update apt + apt: update_cache=yes + become: true + +- name: Install programs to add debian repositories + apt: name={{ item }} state=present force=yes + with_items: + - curl + - apt-transport-https + become: true + +- name: Add kubernetes key + apt_key: + url: https://packages.cloud.google.com/apt/doc/apt-key.gpg + state: present + become: true + +- name: Add kubernetes repo + apt_repository: + repo: deb https://apt.kubernetes.io/ kubernetes-xenial main + state: present + become: true + +- name: Update apt + apt: update_cache=yes + become: true + +- name: Install kubectl + apt: name={{ item }} state=present force=yes + with_items: + - kubectl + become: true + +- name: Creates RKE directory on nodes + file: + path: "{{ rke_node_directory }}" + state: directory + mode: '0774' + owner: "{{ standard_user }}" + group: "{{ standard_user }}" + become: true + +- name: Check if RKE cluster state file exists + stat: + path: "{{ rke_node_directory }}/{{ rke_cluster_name }}.rkestate" + register: cluster_state_result + become: true + +- name: Check if RKE kubeconfig file exists + stat: + path: "{{ rke_node_directory }}/kube_config_{{ rke_cluster_name }}.yaml" + register: kube_config_result + become: true + +- name: Copy RKE cluster state back to local if it already exists + fetch: + src: "{{ rke_node_directory }}/{{ rke_cluster_name }}.rkestate" + dest: "{{ rke_directory }}/{{ rke_cluster_name }}.rkestate" + flat: yes + when: cluster_state_result.stat.exists == True + become: true + +- name: Copy RKE kube config if it already exists + fetch: + src: "{{ rke_node_directory }}/kube_config_{{ rke_cluster_name }}.yaml" + dest: "{{ rke_directory }}/kube_config_{{ rke_cluster_name }}.yaml" + flat: yes + when: kube_config_result.stat.exists == True + become: true \ No newline at end of file diff --git a/roles/kubernetes/templates/rke-cluster-deployment.yaml.j2 b/roles/kubernetes/templates/rke-cluster-deployment.yaml.j2 new file mode 100644 index 0000000..9dac4ef --- /dev/null +++ b/roles/kubernetes/templates/rke-cluster-deployment.yaml.j2 @@ -0,0 +1,32 @@ +--- + +ssh_key_path: {{ rke_ssh_key_location }} + +cluster_name: rke_cluster_name +ignore_docker_version: true +system_images: + kubernetes: rancher/hyperkube:v1.15.5-rancher1 + +nodes: + {% for node in groups['kube-masters'] %} + + - address: {{node}} + name: {{node}} + user: {{standard_user}} + role: + - controlplane + - etcd + {% endfor %} + {% for node in groups['kube-workers'] %} + + - address: {{node}} + name: {{node}} + user: {{standard_user}} + role: + - worker + {% endfor %} + +authentication: + strategy: x509 + sans: + - "kubernetes.{{ root_domain }}" \ No newline at end of file diff --git a/roles/kubernetes/templates/rke-configs/alertmanager-configmap.yaml.j2 b/roles/kubernetes/templates/rke-configs/alertmanager-configmap.yaml.j2 new file mode 100644 index 0000000..471cb4a --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/alertmanager-configmap.yaml.j2 @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: alertmanager-config + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: EnsureExists +data: + alertmanager.yml: | + global: null + receivers: + - name: default-receiver + - name: email + email_configs: + - to: jacob.wimer@gmail.com + from: jacob.wimer@gmail.com + smarthost: smtp.gmail.com:587 + auth_username: "jacob.wimer@gmail.com" + auth_identity: "jacob.wimer@gmail.com" + auth_password: {{ alertmanager_email_password }} + send_resolved: true + + route: + group_interval: 5m + group_wait: 10s + receiver: email + repeat_interval: 3h diff --git a/roles/kubernetes/templates/rke-configs/alertmanager-deployment.yaml.j2 b/roles/kubernetes/templates/rke-configs/alertmanager-deployment.yaml.j2 new file mode 100644 index 0000000..744d9ad --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/alertmanager-deployment.yaml.j2 @@ -0,0 +1,79 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alertmanager + namespace: kube-system + labels: + k8s-app: alertmanager + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile + version: v0.14.0 +spec: + replicas: 1 + selector: + matchLabels: + k8s-app: alertmanager + version: v0.14.0 + template: + metadata: + labels: + k8s-app: alertmanager + version: v0.14.0 + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + priorityClassName: system-cluster-critical + containers: + - name: prometheus-alertmanager + image: "prom/alertmanager:v0.14.0" + imagePullPolicy: "IfNotPresent" + args: + - --config.file=/etc/config/alertmanager.yml + - --storage.path=/data + - --web.external-url=/ + ports: + - containerPort: 9093 + readinessProbe: + httpGet: + path: /#/status + port: 9093 + initialDelaySeconds: 30 + timeoutSeconds: 30 + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: storage-volume + mountPath: "/data" + subPath: "" + resources: + limits: + cpu: 10m + memory: 50Mi + requests: + cpu: 10m + memory: 50Mi + - name: prometheus-alertmanager-configmap-reload + image: "jimmidyson/configmap-reload:v0.1" + imagePullPolicy: "IfNotPresent" + args: + - --volume-dir=/etc/config + - --webhook-url=http://localhost:9093/-/reload + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + resources: + limits: + cpu: 10m + memory: 10Mi + requests: + cpu: 10m + memory: 10Mi + volumes: + - name: config-volume + configMap: + name: alertmanager-config + - name: storage-volume + persistentVolumeClaim: + claimName: alertmanager + diff --git a/roles/kubernetes/templates/rke-configs/alertmanager-pvc.yaml.j2 b/roles/kubernetes/templates/rke-configs/alertmanager-pvc.yaml.j2 new file mode 100644 index 0000000..b82b12e --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/alertmanager-pvc.yaml.j2 @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: alertmanager + namespace: kube-system + labels: + app: alertmanager +spec: + storageClassName: standard + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi diff --git a/roles/kubernetes/templates/rke-configs/alertmanager-service.yaml.j2 b/roles/kubernetes/templates/rke-configs/alertmanager-service.yaml.j2 new file mode 100644 index 0000000..62c7b59 --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/alertmanager-service.yaml.j2 @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: alertmanager + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile + kubernetes.io/name: "Alertmanager" +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 9093 + selector: + k8s-app: alertmanager + type: "ClusterIP" diff --git a/roles/kubernetes/templates/rke-configs/kube-state-metrics-deployment.yaml.j2 b/roles/kubernetes/templates/rke-configs/kube-state-metrics-deployment.yaml.j2 new file mode 100644 index 0000000..823696b --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/kube-state-metrics-deployment.yaml.j2 @@ -0,0 +1,92 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: kube-system + labels: + k8s-app: kube-state-metrics + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile + version: v1.3.0 +spec: + selector: + matchLabels: + k8s-app: kube-state-metrics + version: v1.3.0 + replicas: 1 + template: + metadata: + labels: + k8s-app: kube-state-metrics + version: v1.3.0 + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + priorityClassName: system-cluster-critical + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: quay.io/coreos/kube-state-metrics:v1.3.0 + ports: + - name: http-metrics + containerPort: 8080 + - name: telemetry + containerPort: 8081 + readinessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + - name: addon-resizer + image: k8s.gcr.io/addon-resizer:1.8.4 + resources: + limits: + cpu: 100m + memory: 30Mi + requests: + cpu: 100m + memory: 30Mi + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - name: config-volume + mountPath: /etc/config + command: + - /pod_nanny + - --config-dir=/etc/config + - --container=kube-state-metrics + - --cpu=100m + - --extra-cpu=1m + - --memory=100Mi + - --extra-memory=2Mi + - --threshold=5 + - --deployment=kube-state-metrics + volumes: + - name: config-volume + configMap: + name: kube-state-metrics-config +--- +# Config map for resource configuration. +apiVersion: v1 +kind: ConfigMap +metadata: + name: kube-state-metrics-config + namespace: kube-system + labels: + k8s-app: kube-state-metrics + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +data: + NannyConfiguration: |- + apiVersion: nannyconfig/v1alpha1 + kind: NannyConfiguration + + diff --git a/roles/kubernetes/templates/rke-configs/kube-state-metrics-rbac.yaml.j2 b/roles/kubernetes/templates/rke-configs/kube-state-metrics-rbac.yaml.j2 new file mode 100644 index 0000000..6eb2981 --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/kube-state-metrics-rbac.yaml.j2 @@ -0,0 +1,104 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-state-metrics + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +rules: +- apiGroups: [""] + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: ["list", "watch"] +- apiGroups: ["extensions"] + resources: + - daemonsets + - deployments + - replicasets + verbs: ["list", "watch"] +- apiGroups: ["apps"] + resources: + - statefulsets + verbs: ["list", "watch"] +- apiGroups: ["batch"] + resources: + - cronjobs + - jobs + verbs: ["list", "watch"] +- apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kube-state-metrics-resizer + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +rules: +- apiGroups: [""] + resources: + - pods + verbs: ["get"] +- apiGroups: ["extensions"] + resources: + - deployments + resourceNames: ["kube-state-metrics"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-state-metrics + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kube-state-metrics + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kube-state-metrics-resizer +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system + diff --git a/roles/kubernetes/templates/rke-configs/kube-state-metrics-service.yaml.j2 b/roles/kubernetes/templates/rke-configs/kube-state-metrics-service.yaml.j2 new file mode 100644 index 0000000..bad3ffd --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/kube-state-metrics-service.yaml.j2 @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile + kubernetes.io/name: "kube-state-metrics" + annotations: + prometheus.io/scrape: 'true' +spec: + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + protocol: TCP + - name: telemetry + port: 8081 + targetPort: telemetry + protocol: TCP + selector: + k8s-app: kube-state-metrics diff --git a/roles/kubernetes/templates/rke-configs/monitoring-ingress.yaml.j2 b/roles/kubernetes/templates/rke-configs/monitoring-ingress.yaml.j2 new file mode 100644 index 0000000..feb13f3 --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/monitoring-ingress.yaml.j2 @@ -0,0 +1,32 @@ +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + namespace: kube-system + name: alertmanager + #annotations: + # kubernetes.io/ingress.class: traefik +spec: + rules: + - host: alertmanager.{{ domain }} + http: + paths: + - backend: + serviceName: alertmanager + servicePort: 80 +--- +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + namespace: kube-system + name: prometheus + #annotations: + # kubernetes.io/ingress.class: traefik +spec: + rules: + - host: prometheus.{{ domain }} + http: + paths: + - backend: + serviceName: prometheus + servicePort: 9090 + diff --git a/roles/kubernetes/templates/rke-configs/nfs-client-deployment.yaml.j2 b/roles/kubernetes/templates/rke-configs/nfs-client-deployment.yaml.j2 new file mode 100644 index 0000000..3557fe2 --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/nfs-client-deployment.yaml.j2 @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nfs-client-provisioner +--- +kind: Deployment +apiVersion: extensions/v1beta1 +metadata: + name: nfs-client-provisioner +spec: + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: nfs-client-provisioner + spec: + serviceAccountName: nfs-client-provisioner + containers: + - name: nfs-client-provisioner + image: quay.io/external_storage/nfs-client-provisioner:latest + volumeMounts: + - name: nfs-client-root + mountPath: /persistentvolumes + env: + - name: PROVISIONER_NAME + value: {{ domain }}/nfs + - name: NFS_SERVER + value: {{ nfs_location }} + - name: NFS_PATH + value: {{ rke_nfs_path }} + volumes: + - name: nfs-client-root + nfs: + server: {{ nfs_location }} + path: {{ rke_nfs_path }} diff --git a/roles/kubernetes/templates/rke-configs/nfs-client-rbac.yaml.j2 b/roles/kubernetes/templates/rke-configs/nfs-client-rbac.yaml.j2 new file mode 100644 index 0000000..8c8c640 --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/nfs-client-rbac.yaml.j2 @@ -0,0 +1,58 @@ +kind: ServiceAccount +apiVersion: v1 +metadata: + name: nfs-client-provisioner +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: nfs-client-provisioner-runner +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "update", "patch"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: run-nfs-client-provisioner +subjects: + - kind: ServiceAccount + name: nfs-client-provisioner + namespace: kube-system +roleRef: + kind: ClusterRole + name: nfs-client-provisioner-runner + apiGroup: rbac.authorization.k8s.io +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: leader-locking-nfs-client-provisioner +rules: + - apiGroups: [""] + resources: ["endpoints"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: leader-locking-nfs-client-provisioner +subjects: + - kind: ServiceAccount + name: nfs-client-provisioner + # replace with namespace where provisioner is deployed + namespace: kube-system +roleRef: + kind: Role + name: leader-locking-nfs-client-provisioner + apiGroup: rbac.authorization.k8s.io diff --git a/roles/kubernetes/templates/rke-configs/nfs-client-storageclass.yaml.j2 b/roles/kubernetes/templates/rke-configs/nfs-client-storageclass.yaml.j2 new file mode 100644 index 0000000..c81654d --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/nfs-client-storageclass.yaml.j2 @@ -0,0 +1,7 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: standard +provisioner: {{ domain }}/nfs # or choose another name, must match deployment's env PROVISIONER_NAME' +parameters: + archiveOnDelete: "false" diff --git a/roles/kubernetes/templates/rke-configs/node-exporter.yaml.j2 b/roles/kubernetes/templates/rke-configs/node-exporter.yaml.j2 new file mode 100644 index 0000000..3420285 --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/node-exporter.yaml.j2 @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: Service +metadata: + namespace: kube-system + annotations: + prometheus.io/scrape: 'true' + labels: + app: node-exporter + name: node-exporter + name: node-exporter +spec: + clusterIP: None + ports: + - name: scrape + port: 9100 + protocol: TCP + selector: + app: node-exporter + type: ClusterIP +--- +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + namespace: kube-system + name: node-exporter +spec: + template: + metadata: + labels: + app: node-exporter + name: node-exporter + spec: + containers: + - image: prom/node-exporter + name: node-exporter + ports: + - containerPort: 9100 + hostPort: 9100 + name: scrape + hostNetwork: true + hostPID: true + diff --git a/roles/kubernetes/templates/rke-configs/prometheus-configmap.yaml.j2 b/roles/kubernetes/templates/rke-configs/prometheus-configmap.yaml.j2 new file mode 100644 index 0000000..1f7da47 --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/prometheus-configmap.yaml.j2 @@ -0,0 +1,236 @@ +# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: EnsureExists +data: + rules.yml: | + # raw to endraw is so jija does not fail with prometheus config's double curly brackets in it's syntax + # {% raw %} + groups: + # node-exporter + - name: alert.rules_nodes + rules: + - alert: high_memory_usage_on_node + expr: ((node_memory_MemTotal - node_memory_MemAvailable) / node_memory_MemTotal) + * 100 > 80 + for: 5m + annotations: + description: '{{ $labels.host }} is using a LOT of MEMORY. MEMORY usage is over + {{ humanize $value}}%.' + summary: HIGH MEMORY USAGE WARNING TASK ON '{{ $labels.host }}' + - alert: high_la_usage_on_node + expr: node_load5 > 7 + for: 5m + annotations: + description: '{{ $labels.host }} has a high load average. Load Average 5m is + {{ humanize $value}}.' + summary: HIGH LOAD AVERAGE WARNING ON '{{ $labels.host }}' + - alert: node_running_out_of_disk_space + expr: (node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"}) + * 100 / node_filesystem_size{mountpoint="/"} > 80 + for: 5m + annotations: + description: More than 80% of disk used. Disk usage {{ humanize $value }}%. + summary: 'LOW DISK SPACE WARING: NODE ''{{ $labels.host }}''' + - alert: monitoring_service_down + expr: up == 0 + for: 90s + annotations: + description: "The monitoring service '{{ $labels.job }}' is down." + summary: "MONITORING SERVICE DOWN WARNING: NODE '{{ $labels.host }}'" + # {% endraw %} + + # ceph + - name: alert.rules_ceph + rules: + - alert: ceph_health_warning + expr: ceph_health_status == 1 + for: 5m + annotations: + description: CEPH CLUSTER HEALTH WARNING + summary: CEPH CLUSTER HEALTH WARNING + + prometheus.yml: | + alerting: + alertmanagers: + - static_configs: + - targets: ["alertmanager"] + rule_files: + - "rules.yml" + scrape_configs: + #- job_name: 'ceph' + # static_configs: + # - targets: + # - 'rook-ceph-mgr-external:9283' + - job_name: 'kubernetes-node-exporter' + dns_sd_configs: + - names: + - 'node-exporter' + type: 'A' + port: 9100 + + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: kubernetes-apiservers + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + - job_name: kubernetes-nodes-kubelet + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + - job_name: kubernetes-nodes-cadvisor + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __metrics_path__ + replacement: /metrics/cadvisor + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + - job_name: kubernetes-service-endpoints + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + + - job_name: kubernetes-services + kubernetes_sd_configs: + - role: service + metrics_path: /probe + params: + module: + - http_2xx + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - source_labels: + - __address__ + target_label: __param_target + - replacement: blackbox + target_label: __address__ + - source_labels: + - __param_target + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + + - job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name + #alerting: + # alertmanagers: + # - kubernetes_sd_configs: + # - role: pod + # tls_config: + # ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + # relabel_configs: + # - source_labels: [__meta_kubernetes_namespace] + # regex: kube-system + # action: keep + # - source_labels: [__meta_kubernetes_pod_label_k8s_app] + # regex: alertmanager + # action: keep + # - source_labels: [__meta_kubernetes_pod_container_port_number] + # regex: + # action: drop + diff --git a/roles/kubernetes/templates/rke-configs/prometheus-rbac.yaml.j2 b/roles/kubernetes/templates/rke-configs/prometheus-rbac.yaml.j2 new file mode 100644 index 0000000..1961730 --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/prometheus-rbac.yaml.j2 @@ -0,0 +1,56 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: prometheus + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/metrics + - services + - endpoints + - pods + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - nonResourceURLs: + - "/metrics" + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: prometheus + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: prometheus + namespace: kube-system + diff --git a/roles/kubernetes/templates/rke-configs/prometheus-service.yaml.j2 b/roles/kubernetes/templates/rke-configs/prometheus-service.yaml.j2 new file mode 100644 index 0000000..928bb4a --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/prometheus-service.yaml.j2 @@ -0,0 +1,18 @@ +kind: Service +apiVersion: v1 +metadata: + name: prometheus + namespace: kube-system + labels: + kubernetes.io/name: "Prometheus" + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +spec: + ports: + - name: http + port: 9090 + protocol: TCP + targetPort: 9090 + selector: + k8s-app: prometheus + diff --git a/roles/kubernetes/templates/rke-configs/prometheus-statefulset.yaml.j2 b/roles/kubernetes/templates/rke-configs/prometheus-statefulset.yaml.j2 new file mode 100644 index 0000000..437b5bd --- /dev/null +++ b/roles/kubernetes/templates/rke-configs/prometheus-statefulset.yaml.j2 @@ -0,0 +1,110 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: prometheus + namespace: kube-system + labels: + k8s-app: prometheus + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile + version: v2.2.1 +spec: + serviceName: "prometheus" + replicas: 1 + podManagementPolicy: "Parallel" + updateStrategy: + type: "RollingUpdate" + selector: + matchLabels: + k8s-app: prometheus + template: + metadata: + labels: + k8s-app: prometheus + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + priorityClassName: system-cluster-critical + serviceAccountName: prometheus + initContainers: + - name: "init-chown-data" + image: "busybox:latest" + imagePullPolicy: "IfNotPresent" + command: ["chown", "-R", "65534:65534", "/data"] + volumeMounts: + - name: prometheus-data + mountPath: /data + subPath: "" + containers: + - name: prometheus-server-configmap-reload + image: "jimmidyson/configmap-reload:v0.1" + imagePullPolicy: "IfNotPresent" + args: + - --volume-dir=/etc/config + - --webhook-url=http://localhost:9090/-/reload + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + resources: + limits: + cpu: 10m + memory: 10Mi + requests: + cpu: 10m + memory: 10Mi + + - name: prometheus-server + image: "prom/prometheus:v2.2.1" + imagePullPolicy: "IfNotPresent" + args: + - --config.file=/etc/config/prometheus.yml + - --storage.tsdb.path=/data + - --web.console.libraries=/etc/prometheus/console_libraries + - --web.console.templates=/etc/prometheus/consoles + - --web.enable-lifecycle + ports: + - containerPort: 9090 + readinessProbe: + httpGet: + path: /-/ready + port: 9090 + initialDelaySeconds: 30 + timeoutSeconds: 30 + livenessProbe: + httpGet: + path: /-/healthy + port: 9090 + initialDelaySeconds: 30 + timeoutSeconds: 30 + # based on 10 running nodes with 30 pods each + resources: + limits: + cpu: 200m + memory: 1000Mi + requests: + cpu: 200m + memory: 1000Mi + + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: prometheus-data + mountPath: /data + subPath: "" + terminationGracePeriodSeconds: 300 + volumes: + - name: config-volume + configMap: + name: prometheus-config + volumeClaimTemplates: + - metadata: + name: prometheus-data + spec: + storageClassName: standard + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "16Gi" + diff --git a/roles/startup-infrastructure/tasks/configure-portainer.yml b/roles/startup-infrastructure/tasks/configure-portainer.yml deleted file mode 100644 index fba266b..0000000 --- a/roles/startup-infrastructure/tasks/configure-portainer.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -#https://app.swaggerhub.com/apis/deviantony/Portainer/1.19.2/#/users/ -- name: Check for portainer admin account - shell: > - curl --silent -I -X GET "http://portainer.{{ root_domain }}/api/users/admin/check" -H "accept: application/json" - register: admin_account_check - delegate_to: localhost - -- name: Init admin account if it hasn't already - shell: > - curl -X POST "http://portainer.{{ root_domain }}/api/users/admin/init" -H "accept: application/json" -H \ - "Content-Type: application/json" -d "{ \"Username\": \"admin\", \"Password\": \"{{ portainer_admin_password }}\"}" - when: not admin_account_check.stdout | search("204") - delegate_to: localhost \ No newline at end of file diff --git a/roles/startup-infrastructure/tasks/main.yml b/roles/startup-infrastructure/tasks/main.yml index 9a060e6..722f658 100644 --- a/roles/startup-infrastructure/tasks/main.yml +++ b/roles/startup-infrastructure/tasks/main.yml @@ -1,27 +1,62 @@ --- -- name: Create appnet - shell: > - docker network ls | grep "appnet" || - { - docker network create --driver overlay appnet - } +- name: Create Startup Infrastructure Directory + file: + path: /opt/startup-infrastructure + state: directory become: true - -- name: Place the compose file - template: - src: ../roles/startup-infrastructure/templates/docker-compose.yml.j2 - dest: /data/startup-infrastructure.yml - mode: 0600 +- name: Download Helm + get_url: + url: https://get.helm.sh/helm-v2.14.3-linux-amd64.tar.gz + dest: /tmp/helm.tar.gz become: true -- name: Run stack deploy - shell: > - docker stack deploy -c /data/startup-infrastructure.yml startup-infrastructure +- name: Unarchive Helm + unarchive: + src: /tmp/helm.tar.gz + dest: /tmp + copy: no become: true -- name: Give containers time to spin up - wait_for: - timeout: 120 +- name: Copy Helm to /usr/local/bin + command: bash -c "mv /tmp/linux-amd64/helm /usr/local/bin/helm && rm -rf /tmp/linux-amd64" + become: true -- include_tasks: configure-portainer.yml \ No newline at end of file +- name: Make Helm executable + file: + dest: /usr/local/bin/helm + mode: a+x + become: true + +- name: Download kubectl + get_url: + url: https://storage.googleapis.com/kubernetes-release/release/v1.16.0/bin/linux/amd64/kubectl + dest: /usr/local/bin/ + become: true + +- name: Make kubectl executable + file: + dest: /usr/local/bin/kubectl + mode: a+x + become: true + +- name: Create Tiller serviceaccount + command: kubectl -n kube-system create serviceaccount tiller + become: true + environment: + KUBECONFIG: "{{ rke_directory }}/kube_config_{{ rke_cluster_name }}.yaml" + +- name: Create Tiller rbac + become: true + command: > + kubectl create clusterrolebinding tiller \ + --clusterrole=cluster-admin \ + --serviceaccount=kube-system:tiller + environment: + KUBECONFIG: "{{ rke_directory }}/kube_config_{{ rke_cluster_name }}.yaml" + +- name: Initialize Tiller + command: helm init --service-account tiller + become: true + environment: + KUBECONFIG: "{{ rke_directory }}/kube_config_{{ rke_cluster_name }}.yaml" \ No newline at end of file diff --git a/roles/startup-infrastructure/templates/docker-compose.yml.j2 b/roles/startup-infrastructure/templates/docker-compose.yml.j2 deleted file mode 100644 index f7719e7..0000000 --- a/roles/startup-infrastructure/templates/docker-compose.yml.j2 +++ /dev/null @@ -1,83 +0,0 @@ -#jinja2: lstrip_blocks: True -# ^that fixes tab in compose files when jinja2 compiles them -{% set docker_volumes = ['portainer_data','wekan-db','wekan-db-dump'] %} -version: '3.1' -networks: - appnet: - external: true - portainer: - driver: overlay - -services: - traefik: - image: traefik:1.6.4 - ports: - - "80:80" - - "443:443" - - "8081:8080" - networks: - - appnet - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro - command: --docker --docker.swarmMode --docker.domain={{ root_domain }} --docker.watch --api --ping - # --acme --acme.email='test@test.com' --acme.storage='acme.json' --acme.entrypoint='https' - deploy: - mode: replicated - replicas: 1 - placement: - constraints: - - node.role == manager - - portainer-agent: - image: portainer/agent - environment: - # REQUIRED: Should be equal to the service name prefixed by "tasks." when - # deployed inside an overlay network - AGENT_CLUSTER_ADDR: tasks.portainer-agent - # AGENT_PORT: 9001 - # LOG_LEVEL: debug - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - /var/lib/docker/volumes:/var/lib/docker/volumes - networks: - - portainer - deploy: - mode: global - placement: - constraints: [node.platform.os == linux] - - portainer: - image: portainer/portainer - command: -H tcp://tasks.portainer-agent:9001 --tlsskipverify - volumes: - - portainer_data:/data - networks: - - portainer - - appnet - deploy: - labels: - - "traefik.frontend.entryPoints=http" - - "traefik.protocol=http" - - "traefik.backend=portainer" - - "traefik.port=9000" - - "traefik.docker.network=appnet" - - "traefik.frontend.rule=Host:portainer.{{ root_domain }}" - mode: replicated - replicas: 1 - placement: - constraints: - - node.role == manager - -volumes: -{% for volume in docker_volumes %} - {{ volume }}: - {% if storage_type == 'nfs' %} - driver: local - driver_opts: - type: nfs - o: "addr={{ nfs_address }},soft,nolock,rw" - device: ":{{ nfs_root_path }}/{{ volume }}" - {% elif storage_type == 'local' %} - driver: local - {% endif %} -{% endfor %} \ No newline at end of file diff --git a/roles/startup-infrastructure/templates/old-code.yml b/roles/startup-infrastructure/templates/old-code.yml deleted file mode 100644 index bd01ba0..0000000 --- a/roles/startup-infrastructure/templates/old-code.yml +++ /dev/null @@ -1,116 +0,0 @@ - wekandb: - # All Wekan data is stored in MongoDB. For backup and restore, see: - # https://github.com/wekan/wekan/wiki/Export-Docker-Mongo-Data - image: mongo:3.2.21 - command: mongod --smallfiles --oplogSize 128 - networks: - - wekan - volumes: - - wekan-db:/data/db - - wekan-db-dump:/dump - deploy: - mode: replicated - replicas: 1 - {% if (groups['workers'] | length) > 0 %} - placement: - constraints: - - node.role == worker - {% endif %} - - wekan: - image: quay.io/wekan/wekan - networks: - - wekan - - appnet - environment: - - ROOT_URL=http://{{ root_domain }} - - MONGO_URL=mongodb://wekandb:27017/wekan - #- MAIL_URL=smtp://user:pass@mailserver.example.com:25/ - #- MAIL_FROM='Example Wekan Support ' - - WITH_API=true - deploy: - labels: - - "traefik.frontend.entryPoints=http" - - "traefik.protocol=http" - - "traefik.backend=wekan" - - "traefik.port=8080" - - "traefik.docker.network=appnet" - - "traefik.frontend.rule=Host:wekan.{{ root_domain }}" - mode: replicated - replicas: 1 - {% if (groups['workers'] | length) > 0 %} - placement: - constraints: - - node.role == worker - {% endif %} - - bitwarden: - image: mprasil/bitwarden - networks: - - appnet - volumes: - - bitwarden_data:/data - deploy: - labels: - - "traefik.frontend.entryPoints=http" - - "traefik.protocol=http" - - "traefik.backend=bitwarden" - - "traefik.port=80" - - "traefik.docker.network=appnet" - - "traefik.frontend.rule=Host:bitwarden.{{ root_domain }}" - mode: replicated - replicas: 1 - {% if (groups['workers'] | length) > 0 %} - placement: - constraints: - - node.role == worker - {% endif %} - - gitea: - image: gitea/gitea:latest - environment: - - USER_UID=1000 - - USER_GID=1000 - networks: - - appnet - volumes: - - gitea_data:/data - ports: - - "2222:22" - deploy: - labels: - - "traefik.frontend.entryPoints=http" - - "traefik.protocol=http" - - "traefik.backend=git" - - "traefik.port=3000" - - "traefik.docker.network=appnet" - - "traefik.frontend.rule=Host:git.{{ root_domain }}" - mode: replicated - replicas: 1 - {% if (groups['workers'] | length) > 0 %} - placement: - constraints: - - node.role == worker - {% endif %} - - dokuwiki: - image: mprasil/dokuwiki - networks: - - appnet - volumes: - - dokuwiki_data:/dokuwiki - deploy: - labels: - - "traefik.frontend.entryPoints=http" - - "traefik.protocol=http" - - "traefik.backend=dokuwiki" - - "traefik.port=80" - - "traefik.docker.network=appnet" - - "traefik.frontend.rule=Host:dokuwiki.{{ root_domain }}" - mode: replicated - replicas: 1 - {% if (groups['workers'] | length) > 0 %} - placement: - constraints: - - node.role == worker - {% endif %} \ No newline at end of file diff --git a/tests/files/group_vars_all b/tests/files/group_vars_all index 97ff688..4798d26 100644 --- a/tests/files/group_vars_all +++ b/tests/files/group_vars_all @@ -2,40 +2,18 @@ # Variables listed here are applicable to all host groups ### Software versions -docker_compose_version_to_install: 1.18.0 docker_ce_version_to_install: 18.03.1 +### rke variables +rke_directory: /opt/rke +rke_ssh_key_location: /vagrant/tests/files/test_rsa +rke_node_directory: /opt/rke +rke_version: 0.3.1 +rke_cluster_name: rke-k8s + ### User stuff standard_user: vagrant chosen_timezone: "America/New_York" # root domain for all services. You should have an A record for *.root_domain. For example, if your domain is test.com you should have an A record for *.test.com pointing to your node. # this will allow automatic dns for for things like dokuwiki.test.com and portainer.test.com -root_domain: test.com -portainer_admin_password: "admin-password" - -# interface for the swarm network -swarm_network_interface: enp0s8 - -### Persistent storage if you are doing a single machine deploy, local is an option. If you are doing multi instance deploy, choose one of the following: -# nfs -# digitalocean -# gcp -# aws -# openstack -# ceph -# then fill out the variables associated with your choice below -storage_type: "local" - -# nfs variables: these need defined if using nfs storage -#nfs_address: 0.0.0.0 -#nfs_root_path: /some/filepath/on/nfs - -# digitalocean variables: these need defined if using digitalocean storage - -# gcp variables: these need defined if using gcp storage - -# aws variables: these need defined if using aws storage - -# openstack variables: these need defined if using openstack storage - -# ceph variables: these need defined if using ceph storage \ No newline at end of file +root_domain: test.com \ No newline at end of file diff --git a/tests/files/provision-script.sh b/tests/files/provision-script.sh index 54de01e..0d8f586 100644 --- a/tests/files/provision-script.sh +++ b/tests/files/provision-script.sh @@ -1,5 +1,6 @@ #!/bin/bash +mkdir -p /root/.ssh # Putting test_rsa.pub into root and vagrant authorized keys echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDYa9zstumlg7XkKoNrJMlIN/zteqMA9J4GjuZA7r0xfMPrz4CglxzYKd/BhBpwp/HhU+vSR6vBa15kRODHdPZ+T1oXzMXAmMT3R2ZJRqF280Hsx9sK0X+FZWM84e4a1zQUrxuWyWJ4kKIiaX6DBAmhy8zHNvQ0c4Nk1exfwRicojaze71qrexSas4FHWaI4usC/g3mMKfiML/QX0UWW/G+D8qrg3cK3zClG916XlY/p1h9SWantqz75ea33TtmDNW6iCraKSjVeDGfzhshJsmQ7+/Rr/L4/s7hdpwTqdjSlJTIi61eBxcpDfMWBmsHOMZgnsTZ3wrdYXo70k44moA7 vagrant@test" >> /home/vagrant/.ssh/authorized_keys echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDYa9zstumlg7XkKoNrJMlIN/zteqMA9J4GjuZA7r0xfMPrz4CglxzYKd/BhBpwp/HhU+vSR6vBa15kRODHdPZ+T1oXzMXAmMT3R2ZJRqF280Hsx9sK0X+FZWM84e4a1zQUrxuWyWJ4kKIiaX6DBAmhy8zHNvQ0c4Nk1exfwRicojaze71qrexSas4FHWaI4usC/g3mMKfiML/QX0UWW/G+D8qrg3cK3zClG916XlY/p1h9SWantqz75ea33TtmDNW6iCraKSjVeDGfzhshJsmQ7+/Rr/L4/s7hdpwTqdjSlJTIi61eBxcpDfMWBmsHOMZgnsTZ3wrdYXo70k44moA7 vagrant@test" >> /root/.ssh/authorized_keys diff --git a/tests/vagrant-tests.sh b/tests/vagrant-tests.sh index d61596e..17f3db9 100644 --- a/tests/vagrant-tests.sh +++ b/tests/vagrant-tests.sh @@ -23,33 +23,12 @@ function run-tests { testbash "Running deploy script should not fail." \ "vagrant ssh client -c 'bash /vagrant/tests/files/run-test-deploy.sh'" - local -r node_ls_output=$(vagrant ssh bootstrap \ - -c "docker node ls --format '{{.Hostname}} {{.Status}} {{.Availability}} {{.ManagerStatus}}'" - ) - echo docker node ls output is: - echo $node_ls_output - local -r number_of_docker_leaders=$(echo "${node_ls_output}" \ - | grep -v 'Connection' \ - | awk '{ print $4 }' \ - | grep '^Leader$' \ - | wc -l) - local -r number_of_docker_nodes=$(echo "${node_ls_output}" \ - | grep -v 'Connection' \ - | awk '{ print $1 }' \ - | wc -l) + testbash "Running kubectl should not fail" \ + "vagrant ssh client -c 'export KUBECONFIG=/opt/rke/kube_config_rke-k8s.yaml; kubectl get nodes'" - testbash "There are 2 docker swarm nodes" \ - "test ${number_of_docker_nodes} -eq 2" - - testbash "The swarm has a leader" \ - "test ${number_of_docker_leaders} -eq 1" - - testbash "Traefik got deployed" \ - "vagrant ssh client -c 'curl --silent http://swarm.test.com:8081/ping | grep OK > /dev/null'" - - testbash "Portainer was deployed and admin account was initialized" \ - "vagrant ssh client -c 'curl --silent -I \ - -X GET \"http://portainer.test.com/api/users/admin/check\" -H \"accept: application/json\"' | grep 204" + # testbash "Portainer was deployed and admin account was initialized" \ + # "vagrant ssh client -c 'curl --silent -I \ + # -X GET \"http://portainer.test.com/api/users/admin/check\" -H \"accept: application/json\"' | grep 204" } function destroy-infrastructure {