commit cabc6749a548d2d62d04a1710ae8c6305b6170a9 Author: Jacob Cody Wimer Date: Mon Oct 29 07:10:06 2018 -0400 Initial commit diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..58e80cd --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Jacob Cody Wimer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..44378a6 --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# autoscale-docker-swarm +This project is intended to bring auto service staling to Docker Swarm. This script uses prometheus paired with cadvisor metrics to determine cpu usage. It then uses a manager node to determine if a service wants to be autoscaled and uses a manager node to scale the service. + +## Usage +1. You can deploy prometheus, cadvisor, and docker-swarm-autoscale by running `docker stack deploy -c swarm-autoscale-stack.yml`. +..* You can also utilize an already deploy prometheus and cadvisor by specifying the PROMETHEUS_URL in docker-swarm-autoscale environment. `swarm-autoscale-stack.yml` shows an example of this. +..* docker-swarm-autoscale needs a placement contstraint to deploy to a manager. swarm-autoscale-stack.yml` shows an example of this. +2. For services you want to autoscale you will need a deploy label ``` +deploy: + labels: + - "cpu.autoscale=true" +``` + +## Configuration +| Setting | Value | Description | +| --- | --- | --- | +| `cpu.autoscale` | `true` | Required. This enables autoscaling for a service. Anything other than `true` will not enable it | +| `cpu.autoscale.minimum` | Integer | Optional. This is the minimum number of replicas wanted for a service. The autoscaler will not downscale below this number | +| `cpu.autoscale.maximum` | Integer | Optional. This is the maximum number of replicas wanted for a service. The autoscaler will not scale up past this number | diff --git a/docker-swarm-autoscale/Dockerfile b/docker-swarm-autoscale/Dockerfile new file mode 100644 index 0000000..e38e9ca --- /dev/null +++ b/docker-swarm-autoscale/Dockerfile @@ -0,0 +1,26 @@ +FROM ubuntu:xenial + +RUN apt-get update -qq \ + && apt-get install -y -qq \ + jq \ + apt-transport-https \ + ca-certificates \ + curl \ + software-properties-common \ + && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ + && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu xenial stable" \ + && apt-get update -qq \ + && apt-get install -y -qq \ + docker-ce=18.03.0~ce-0~ubuntu \ + && apt-get -qq clean \ + && apt-get autoremove -y \ + && rm -rf \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + +COPY auto-scale.sh /auto-scale.sh +RUN chmod a+x /auto-scale.sh + +ENTRYPOINT ["/bin/bash"] +CMD ["/auto-scale.sh"] diff --git a/docker-swarm-autoscale/auto-scale.sh b/docker-swarm-autoscale/auto-scale.sh new file mode 100644 index 0000000..7d37381 --- /dev/null +++ b/docker-swarm-autoscale/auto-scale.sh @@ -0,0 +1,33 @@ +CPU_PERCENTAGE_UPPER_LIMIT=85 +CPU_PERCENTAGE_LOWER_LIMIT=25 +while ls > /dev/null; do + #scale up + for service in $(curl --silent "${PROMETHEUS_URL}/api/v1/query?query=sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100>${CPU_PERCENTAGE_UPPER_LIMIT}&g0.tab=1" | jq ".data.result[].metric | .container_label_com_docker_swarm_service_name" | sort | uniq); do + service_name=$(echo $service | sed 's/\"//g') + auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale"]') + replica_maximum=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale.maximum"]' | sed 's/\"//g') + if [[ "${auto_scale_label}" == "\"true\"" ]]; then + current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas") + new_replicas=$(expr $current_replicas + 1) + if [[ $replica_maximum -ge $new_replicas ]]; then + echo scale up $service_name to $new_replicas + docker service scale $service_name=$new_replicas + fi + fi + done + + #scale down + for service in $(curl --silent "${PROMETHEUS_URL}/api/v1/query?query=sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100<${CPU_PERCENTAGE_LOWER_LIMIT}&g0.tab=1" | jq ".data.result[].metric | .container_label_com_docker_swarm_service_name" | sort | uniq); do + service_name=$(echo $service | sed 's/\"//g') + auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale"]') + replica_minimum=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale.minimum"]' | sed 's/\"//g') + if [[ "${auto_scale_label}" == "\"true\"" ]]; then + current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas") + new_replicas=$(expr $current_replicas - 1) + if [[ $replica_minimum -le $new_replicas ]]; then + echo scale down $service_name to $new_replicas + docker service scale $service_name=$new_replicas + fi + fi + done +done diff --git a/prometheus-swarm-autoscale/Dockerfile b/prometheus-swarm-autoscale/Dockerfile new file mode 100644 index 0000000..6c0cfee --- /dev/null +++ b/prometheus-swarm-autoscale/Dockerfile @@ -0,0 +1,2 @@ +FROM prom/prometheus +COPY prometheus.yml /etc/prometheus/prometheus.yml diff --git a/prometheus-swarm-autoscale/prometheus.yml b/prometheus-swarm-autoscale/prometheus.yml new file mode 100644 index 0000000..3c69aa6 --- /dev/null +++ b/prometheus-swarm-autoscale/prometheus.yml @@ -0,0 +1,18 @@ +global: + scrape_interval: 30s + evaluation_interval: 30s + +scrape_configs: + - job_name: 'prometheus' + dns_sd_configs: + - names: + - 'tasks.prometheus' + type: 'A' + port: 9090 + + - job_name: 'cadvisor' + dns_sd_configs: + - names: + - 'tasks.cadvisor' + type: 'A' + port: 8080 diff --git a/swarm-autoscale-stack.yml b/swarm-autoscale-stack.yml new file mode 100755 index 0000000..321521a --- /dev/null +++ b/swarm-autoscale-stack.yml @@ -0,0 +1,65 @@ +version: "3" + +networks: + autoscale: + +services: + docker-swarm-autoscale: + image: jcwimer/docker-swarm-autoscale + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + environment: + - PROMETHEUS_URL=http://prometheus:9090 + networks: + - autoscale + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + cpus: '0.10' + memory: 128M + reservations: + cpus: '0.10' + memory: 64M + cadvisor: + image: google/cadvisor:${CADVISOR_VERSION:-v0.25.0} + networks: + - autoscale + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - /:/rootfs + - /var/run:/var/run + - /sys:/sys + - /var/lib/docker/:/var/lib/docker + deploy: + mode: global + resources: + limits: + cpus: '0.10' + memory: 128M + reservations: + cpus: '0.10' + memory: 64M + + prometheus: + image: jcwimer/prometheus-swarm-autoscale + networks: + - autoscale + command: --storage.tsdb.retention 1d --config.file=/etc/prometheus/prometheus.yml + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == worker + resources: + limits: + cpus: '0.50' + memory: 1024M + reservations: + cpus: '0.50' + memory: 128M