Initial commit

2026-05-09 07:21:59 +00:00 · 2018-10-29 07:10:06 -04:00
commit cabc6749a5
7 changed files with 184 additions and 0 deletions
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Jacob Cody Wimer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,19 @@
+# autoscale-docker-swarm
+This project is intended to bring auto service staling to Docker Swarm. This script uses prometheus paired with cadvisor metrics to determine cpu usage. It then uses a manager node to determine if a service wants to be autoscaled and uses a manager node to scale the service.
+
+## Usage
+1. You can deploy prometheus, cadvisor, and docker-swarm-autoscale by running `docker stack deploy -c swarm-autoscale-stack.yml`.
+..* You can also utilize an already deploy prometheus and cadvisor by specifying the PROMETHEUS_URL in docker-swarm-autoscale environment. `swarm-autoscale-stack.yml` shows an example of this.
+..* docker-swarm-autoscale needs a placement contstraint to deploy to a manager. swarm-autoscale-stack.yml` shows an example of this.
+2. For services you want to autoscale you will need a deploy label ```
+deploy:
+  labels:
+    - "cpu.autoscale=true"
+```
+
+## Configuration
+| Setting | Value | Description |
+| --- | --- | --- |
+| `cpu.autoscale` | `true` | Required. This enables autoscaling for a service. Anything other than `true` will not enable it |
+| `cpu.autoscale.minimum` | Integer | Optional. This is the minimum number of replicas wanted for a service. The autoscaler will not downscale below this number |
+| `cpu.autoscale.maximum` | Integer | Optional. This is the maximum number of replicas wanted for a service. The autoscaler will not scale up past this number | 
--- a/docker-swarm-autoscale/Dockerfile
+++ b/docker-swarm-autoscale/Dockerfile
@@ -0,0 +1,26 @@
+FROM ubuntu:xenial
+
+RUN apt-get update -qq \
+  && apt-get install -y -qq \
+    jq \
+    apt-transport-https \
+    ca-certificates \
+    curl \
+    software-properties-common \
+  && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
+  && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu xenial stable" \
+  && apt-get update -qq \
+  && apt-get install -y -qq \
+    docker-ce=18.03.0~ce-0~ubuntu \
+  && apt-get -qq clean \
+  && apt-get autoremove -y \
+  && rm -rf \
+    /var/lib/apt/lists/* \
+    /tmp/* \
+    /var/tmp/*
+
+COPY auto-scale.sh /auto-scale.sh
+RUN chmod a+x /auto-scale.sh
+
+ENTRYPOINT ["/bin/bash"]
+CMD ["/auto-scale.sh"]
--- a/docker-swarm-autoscale/auto-scale.sh
+++ b/docker-swarm-autoscale/auto-scale.sh
@@ -0,0 +1,33 @@
+CPU_PERCENTAGE_UPPER_LIMIT=85
+CPU_PERCENTAGE_LOWER_LIMIT=25
+while ls > /dev/null; do
+  #scale up
+  for service in $(curl --silent "${PROMETHEUS_URL}/api/v1/query?query=sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100>${CPU_PERCENTAGE_UPPER_LIMIT}&g0.tab=1" | jq ".data.result[].metric | .container_label_com_docker_swarm_service_name" | sort | uniq); do
+    service_name=$(echo $service | sed 's/\"//g')
+    auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale"]')
+    replica_maximum=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale.maximum"]' | sed 's/\"//g')
+    if [[ "${auto_scale_label}" == "\"true\"" ]]; then
+      current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas")
+      new_replicas=$(expr $current_replicas + 1)
+      if [[ $replica_maximum -ge $new_replicas ]]; then
+        echo scale up $service_name to $new_replicas
+        docker service scale $service_name=$new_replicas
+      fi
+    fi
+  done
+
+  #scale down
+  for service in $(curl --silent "${PROMETHEUS_URL}/api/v1/query?query=sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100<${CPU_PERCENTAGE_LOWER_LIMIT}&g0.tab=1" | jq ".data.result[].metric | .container_label_com_docker_swarm_service_name" | sort | uniq); do
+    service_name=$(echo $service | sed 's/\"//g')
+    auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale"]')
+    replica_minimum=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale.minimum"]' | sed 's/\"//g')
+    if [[ "${auto_scale_label}" == "\"true\"" ]]; then
+      current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas")
+      new_replicas=$(expr $current_replicas - 1)
+      if [[ $replica_minimum -le $new_replicas ]]; then
+        echo scale down $service_name to $new_replicas
+        docker service scale $service_name=$new_replicas
+      fi
+    fi
+  done
+done
--- a/prometheus-swarm-autoscale/Dockerfile
+++ b/prometheus-swarm-autoscale/Dockerfile
@@ -0,0 +1,2 @@
+FROM prom/prometheus
+COPY prometheus.yml /etc/prometheus/prometheus.yml
--- a/prometheus-swarm-autoscale/prometheus.yml
+++ b/prometheus-swarm-autoscale/prometheus.yml
@@ -0,0 +1,18 @@
+global:
+  scrape_interval:     30s
+  evaluation_interval: 30s
+
+scrape_configs:
+  - job_name: 'prometheus'
+    dns_sd_configs:
+    - names:
+      - 'tasks.prometheus'
+      type: 'A'
+      port: 9090
+
+  - job_name: 'cadvisor'
+    dns_sd_configs:
+    - names:
+      - 'tasks.cadvisor'
+      type: 'A'
+      port: 8080
--- a/swarm-autoscale-stack.yml
+++ b/swarm-autoscale-stack.yml
@@ -0,0 +1,65 @@
+version: "3"
+
+networks:
+  autoscale:
+
+services:
+  docker-swarm-autoscale:
+    image: jcwimer/docker-swarm-autoscale
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+    environment:
+      - PROMETHEUS_URL=http://prometheus:9090
+    networks:
+      - autoscale
+    deploy:
+      mode: replicated
+      replicas: 1
+      placement:
+        constraints:
+          - node.role == manager
+      resources:
+        limits:
+          cpus: '0.10'
+          memory: 128M
+        reservations:
+          cpus: '0.10'
+          memory: 64M
+  cadvisor:
+    image: google/cadvisor:${CADVISOR_VERSION:-v0.25.0}
+    networks:
+      - autoscale
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - /:/rootfs
+      - /var/run:/var/run
+      - /sys:/sys
+      - /var/lib/docker/:/var/lib/docker
+    deploy:
+      mode: global
+      resources:
+        limits:
+          cpus: '0.10'
+          memory: 128M
+        reservations:
+          cpus: '0.10'
+          memory: 64M
+
+  prometheus:
+    image: jcwimer/prometheus-swarm-autoscale
+    networks:
+      - autoscale
+    command: --storage.tsdb.retention 1d --config.file=/etc/prometheus/prometheus.yml
+    deploy:
+      mode: replicated
+      replicas: 1
+      placement:
+        constraints:
+          - node.role == worker
+      resources:
+        limits:
+          cpus: '0.50'
+          memory: 1024M
+        reservations:
+          cpus: '0.50'
+          memory: 128M