commit 3fcc94fc6df4ff845cdf7d41a2d8322689b363a0 Author: Anton Zadvorny Date: Thu Jan 9 07:29:13 2020 +0300 Initial commit diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..91bd067 --- /dev/null +++ b/.env.example @@ -0,0 +1,9 @@ +TARGETS=https://example.com,https://test.com/status +GRAFANA_USER=admin +GRAFANA_PASSWORD=admin +ALERT_EMAIL_TO=example@example.com +ALERT_EMAIL_FROM=alert@example.com +ALERT_SMTP_HOST=smtp.example.com:25 +ALERT_SMTP_USER=smtp_user +ALERT_SMTP_PASSWORD=smtp_password +ALERT_SLACK_WEBHOOK=https://slack.com/webhook diff --git a/config/alertrules.yml b/config/alertrules.yml new file mode 100644 index 0000000..06a5c61 --- /dev/null +++ b/config/alertrules.yml @@ -0,0 +1,10 @@ +groups: + - name: blackbox + rules: + - alert: EndpointDown + expr: probe_success == 0 + for: 10s + labels: + severity: "critical" + annotations: + summary: "Endpoint {{ $labels.instance }} down" diff --git a/config/blackbox.yml b/config/blackbox.yml new file mode 100644 index 0000000..e2670f7 --- /dev/null +++ b/config/blackbox.yml @@ -0,0 +1,6 @@ +modules: + http_2xx: + prober: http + timeout: 5s + http: + preferred_ip_protocol: ip4 diff --git a/config/grafana/provisioning/dashboards/blackbox.json b/config/grafana/provisioning/dashboards/blackbox.json new file mode 100644 index 0000000..bc30ca1 --- /dev/null +++ b/config/grafana/provisioning/dashboards/blackbox.json @@ -0,0 +1,888 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": 5345, + "graphTooltip": 0, + "id": null, + "iteration": 1523027885726, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 15, + "panels": [], + "repeat": "targets", + "title": "$targets UP/DOWN Status", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 3, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": null, + "repeatDirection": "h", + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "probe_success{target=~\"$targets\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "1,1", + "title": "$targets", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "UP", + "value": "1" + }, + { + "op": "=", + "text": "DOWN", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 0, + "y": 3 + }, + "id": 18, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 3, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeatDirection": "h", + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "probe_http_ssl{target=~\"$targets\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0,1", + "title": "SSL", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "YES", + "value": "1" + }, + { + "op": "=", + "text": "NO", + "value": "0" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 3 + }, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "probe_duration_seconds{target=~\"$targets\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "seconds", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Probe Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 3 + }, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "probe_dns_lookup_time_seconds{target=~\"$targets\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "seconds", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "DNS Lookup", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "Prometheus", + "decimals": 2, + "format": "dtdurations", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 0, + "y": 5 + }, + "id": 19, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 3, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeatDirection": "h", + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "probe_ssl_earliest_cert_expiry{target=~\"$targets\"}-time()", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0,1209600", + "title": "SSL Cert Expiry", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "YES", + "value": "1" + }, + { + "op": "=", + "text": "NO", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 0, + "y": 7 + }, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 3, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeatDirection": "h", + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "probe_http_status_code{target=~\"$targets\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "200,299,300", + "title": "HTTP Status Code", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "YES", + "value": "1" + }, + { + "op": "=", + "text": "NO", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(probe_duration_seconds{target=~\"$targets\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Average Probe Duration", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 24, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(probe_dns_lookup_time_seconds{target=~\"$targets\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Average DNS Lookup", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "refresh": "1m", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "blackbox", + "prometheus" + ], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 10, + "auto_min": "10s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "5s", + "value": "5s" + }, + { + "selected": false, + "text": "10s", + "value": "10s" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "targets", + "options": [], + "query": "label_values(probe_success, target)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Blackbox Exporter", + "uid": "xtkCtBkiz", + "version": 4 +} diff --git a/config/grafana/provisioning/dashboards/dashboard.yml b/config/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..ff7db10 --- /dev/null +++ b/config/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: "Prometheus" + orgId: 1 + folder: "" + type: file + disableDeletion: false + editable: true + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards diff --git a/config/grafana/provisioning/datasources/datasource.yml b/config/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000..bb37f13 --- /dev/null +++ b/config/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + orgId: 1 + url: http://prometheus:9090 + basicAuth: false + isDefault: true + editable: true \ No newline at end of file diff --git a/docker-compose.config.yml b/docker-compose.config.yml new file mode 100644 index 0000000..bc8b016 --- /dev/null +++ b/docker-compose.config.yml @@ -0,0 +1,18 @@ +version: "3" + +services: + gomplate: + image: hairyhenderson/gomplate + container_name: gomplate + command: "--input-dir /templates --output-dir /config" + volumes: + - ./templates:/templates + - ./config:/config + environment: + - TARGETS=${TARGETS:?err} + - ALERT_EMAIL_TO=${ALERT_EMAIL_TO:?err} + - ALERT_EMAIL_FROM=${ALERT_EMAIL_FROM:?err} + - ALERT_SMTP_HOST=${ALERT_SMTP_HOST:?err} + - ALERT_SMTP_USER=${ALERT_SMTP_USER:?err} + - ALERT_SMTP_PASSWORD=${ALERT_SMTP_PASSWORD:?err} + - ALERT_SLACK_WEBHOOK=${ALERT_SLACK_WEBHOOK:?err} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..75b08cf --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,66 @@ +version: "3" + +services: + prometheus: + image: prom/prometheus:v2.15.2 + container_name: prometheus + restart: unless-stopped + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--web.console.libraries=/etc/prometheus/console_libraries" + - "--web.console.templates=/etc/prometheus/consoles" + - "--web.enable-lifecycle" + - "--storage.tsdb.path=/prometheus" + - "--storage.tsdb.retention=200h" + networks: + - monitoring + volumes: + - prometheus_data:/prometheus + - ./config/prometheus.yml:/etc/prometheus/prometheus.yml + - ./config/alertrules.yml:/etc/prometheus/alertrules.yml + + blackbox: + image: prom/blackbox-exporter:v0.16.0 + container_name: blackbox + restart: unless-stopped + command: + - "--config.file=/etc/blackbox/blackbox.yml" + networks: + - monitoring + volumes: + - ./config/blackbox.yml:/etc/blackbox/blackbox.yml + + alertmanager: + image: prom/alertmanager:v0.20.0 + container_name: alertmanager + restart: unless-stopped + command: + - "--config.file=/etc/alertmanager/alertmanager.yml" + - "--storage.path=/alertmanager" + networks: + - monitoring + volumes: + - ./config/alertmanager.yml:/etc/alertmanager/alertmanager.yml + + grafana: + image: grafana/grafana:6.5.2 + container_name: grafana + restart: unless-stopped + networks: + - monitoring + volumes: + - grafana_data:/var/lib/grafana + - ./config/grafana/provisioning:/etc/grafana/provisioning + environment: + - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:?err} + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:?err} + - GF_USERS_ALLOW_SIGN_UP=false + ports: + - 3000:3000 + +networks: + monitoring: {} + +volumes: + prometheus_data: {} + grafana_data: {} diff --git a/templates/alertmanager.yml b/templates/alertmanager.yml new file mode 100644 index 0000000..842b96f --- /dev/null +++ b/templates/alertmanager.yml @@ -0,0 +1,17 @@ +route: + receiver: "default" + +receivers: + - name: "default" + email_configs: + - to: "{{ .Env.ALERT_EMAIL_TO }}" + from: "{{ .Env.ALERT_EMAIL_FROM }}" + smarthost: "{{ .Env.ALERT_SMTP_HOST }}" + auth_username: "{{ .Env.ALERT_SMTP_USER }}" + auth_password: "{{ .Env.ALERT_SMTP_PASSWORD }}" + send_resolved: true + + slack_configs: + - api_url: "{{ .Env.ALERT_SLACK_WEBHOOK }}" + send_resolved: true + diff --git a/templates/prometheus.yml b/templates/prometheus.yml new file mode 100644 index 0000000..2b34e58 --- /dev/null +++ b/templates/prometheus.yml @@ -0,0 +1,41 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + monitor: "docker-host" + +rule_files: + - alertrules.yml + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + + - job_name: "grafana" + static_configs: + - targets: ["grafana:3000"] + + - job_name: 'blackbox' + metrics_path: /probe + params: + module: [http_2xx] + static_configs: + - targets: + {{ range (split .Env.TARGETS ",") }} - "{{ . }}" + {{ end }} + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - source_labels: [__param_target] + target_label: target + - target_label: __address__ + replacement: blackbox:9115 + +alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: ["alertmanager:9093"]