diff --git a/README.md b/README.md index 95b43ec..c6d8ed0 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,18 @@ -# autoscale-docker-swarm +# docker-swarm-autoscaler This project is intended to bring auto service staling to Docker Swarm. This script uses prometheus paired with cadvisor metrics to determine cpu usage. It then uses a manager node to determine if a service wants to be autoscaled and uses a manager node to scale the service. Currently the project only uses cpu to autoscale. If cpu usage reaches 85% the service will scale up, if it reaches 25% it will scale down. ## Usage -1. You can deploy prometheus, cadvisor, and docker-swarm-autoscale by running `docker stack deploy -c swarm-autoscale-stack.yml`. - * You can also utilize an already deploy prometheus and cadvisor by specifying the `PROMETHEUS_URL` in docker-swarm-autoscale environment. `swarm-autoscale-stack.yml` shows an example of this. - * docker-swarm-autoscale needs a placement contstraint to deploy to a manager. `swarm-autoscale-stack.yml` shows an example of this. -2. For services you want to autoscale you will need a deploy label `cpu.autoscale=true`. +1. You can deploy prometheus, cadvisor, and docker-swarm-autoscaler by running `docker stack deploy -c swarm-autoscaler-stack.yml autoscaler`. + * You can also utilize an already deploy prometheus and cadvisor by specifying the `PROMETHEUS_URL` in docker-swarm-autoscaler environment. `swarm-autoscaler-stack.yml` shows an example of this. + * docker-swarm-autoscale needs a placement contstraint to deploy to a manager. `swarm-autoscaler-stack.yml` shows an example of this. +2. For services you want to autoscale you will need a deploy label `swarm.autoscaler=true`. ``` deploy: labels: - - "cpu.autoscale=true" + - "swarm.autoscaler=true" ``` This is best paired with resource constraints limits. This is also under the deploy key. @@ -31,6 +31,6 @@ deploy: ## Configuration | Setting | Value | Description | | --- | --- | --- | -| `cpu.autoscale` | `true` | Required. This enables autoscaling for a service. Anything other than `true` will not enable it | -| `cpu.autoscale.minimum` | Integer | Optional. This is the minimum number of replicas wanted for a service. The autoscaler will not downscale below this number | -| `cpu.autoscale.maximum` | Integer | Optional. This is the maximum number of replicas wanted for a service. The autoscaler will not scale up past this number | +| `swarm.autoscaler` | `true` | Required. This enables autoscaling for a service. Anything other than `true` will not enable it | +| `swarm.autoscaler.minimum` | Integer | Optional. This is the minimum number of replicas wanted for a service. The autoscaler will not downscale below this number | +| `swarm.autoscaler.maximum` | Integer | Optional. This is the maximum number of replicas wanted for a service. The autoscaler will not scale up past this number | diff --git a/docker-swarm-autoscale/Dockerfile b/docker-swarm-autoscaler/Dockerfile similarity index 100% rename from docker-swarm-autoscale/Dockerfile rename to docker-swarm-autoscaler/Dockerfile diff --git a/docker-swarm-autoscale/auto-scale.sh b/docker-swarm-autoscaler/auto-scale.sh similarity index 56% rename from docker-swarm-autoscale/auto-scale.sh rename to docker-swarm-autoscaler/auto-scale.sh index 7d37381..c45aee3 100644 --- a/docker-swarm-autoscale/auto-scale.sh +++ b/docker-swarm-autoscaler/auto-scale.sh @@ -1,11 +1,14 @@ CPU_PERCENTAGE_UPPER_LIMIT=85 CPU_PERCENTAGE_LOWER_LIMIT=25 +PROMETHEUS_API="api/v1/query?query=" +PROMETHEUS_QUERY="sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100" + while ls > /dev/null; do #scale up - for service in $(curl --silent "${PROMETHEUS_URL}/api/v1/query?query=sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100>${CPU_PERCENTAGE_UPPER_LIMIT}&g0.tab=1" | jq ".data.result[].metric | .container_label_com_docker_swarm_service_name" | sort | uniq); do + for service in $(curl --silent "${PROMETHEUS_URL}/${PROMETHEUS_API}${PROMETHEUS_QUERY}>${CPU_PERCENTAGE_UPPER_LIMIT}" | jq ".data.result[].metric | .container_label_com_docker_swarm_service_name" | sort | uniq); do service_name=$(echo $service | sed 's/\"//g') - auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale"]') - replica_maximum=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale.maximum"]' | sed 's/\"//g') + auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler"]') + replica_maximum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.maximum"]' | sed 's/\"//g') if [[ "${auto_scale_label}" == "\"true\"" ]]; then current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas") new_replicas=$(expr $current_replicas + 1) @@ -17,10 +20,10 @@ while ls > /dev/null; do done #scale down - for service in $(curl --silent "${PROMETHEUS_URL}/api/v1/query?query=sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100<${CPU_PERCENTAGE_LOWER_LIMIT}&g0.tab=1" | jq ".data.result[].metric | .container_label_com_docker_swarm_service_name" | sort | uniq); do + for service in $(curl --silent "${PROMETHEUS_URL}${PROMETHEUS_API}${PROMETHEUS_QUERY}<${CPU_PERCENTAGE_LOWER_LIMIT}" | jq ".data.result[].metric | .container_label_com_docker_swarm_service_name" | sort | uniq); do service_name=$(echo $service | sed 's/\"//g') - auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale"]') - replica_minimum=$(docker service inspect $service_name | jq '.[].Spec.Labels["cpu.autoscale.minimum"]' | sed 's/\"//g') + auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler"]') + replica_minimum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.minimum"]' | sed 's/\"//g') if [[ "${auto_scale_label}" == "\"true\"" ]]; then current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas") new_replicas=$(expr $current_replicas - 1) diff --git a/prometheus-swarm-autoscale/Dockerfile b/prometheus-swarm-autoscaler/Dockerfile similarity index 100% rename from prometheus-swarm-autoscale/Dockerfile rename to prometheus-swarm-autoscaler/Dockerfile diff --git a/prometheus-swarm-autoscale/prometheus.yml b/prometheus-swarm-autoscaler/prometheus.yml similarity index 100% rename from prometheus-swarm-autoscale/prometheus.yml rename to prometheus-swarm-autoscaler/prometheus.yml diff --git a/swarm-autoscale-stack.yml b/swarm-autoscaler-stack.yml similarity index 91% rename from swarm-autoscale-stack.yml rename to swarm-autoscaler-stack.yml index 321521a..dca9036 100755 --- a/swarm-autoscale-stack.yml +++ b/swarm-autoscaler-stack.yml @@ -4,8 +4,8 @@ networks: autoscale: services: - docker-swarm-autoscale: - image: jcwimer/docker-swarm-autoscale + docker-swarm-autoscaler: + image: jcwimer/docker-swarm-autoscaler volumes: - /var/run/docker.sock:/var/run/docker.sock:ro environment: @@ -46,7 +46,7 @@ services: memory: 64M prometheus: - image: jcwimer/prometheus-swarm-autoscale + image: jcwimer/prometheus-swarm-autoscaler networks: - autoscale command: --storage.tsdb.retention 1d --config.file=/etc/prometheus/prometheus.yml