Initial commit

This commit is contained in:
Anton Zadvorny 2020-01-09 07:29:13 +03:00
commit 3fcc94fc6d
10 changed files with 1078 additions and 0 deletions

9
.env.example Normal file
View File

@ -0,0 +1,9 @@
TARGETS=https://example.com,https://test.com/status
GRAFANA_USER=admin
GRAFANA_PASSWORD=admin
ALERT_EMAIL_TO=example@example.com
ALERT_EMAIL_FROM=alert@example.com
ALERT_SMTP_HOST=smtp.example.com:25
ALERT_SMTP_USER=smtp_user
ALERT_SMTP_PASSWORD=smtp_password
ALERT_SLACK_WEBHOOK=https://slack.com/webhook

10
config/alertrules.yml Normal file
View File

@ -0,0 +1,10 @@
groups:
- name: blackbox
rules:
- alert: EndpointDown
expr: probe_success == 0
for: 10s
labels:
severity: "critical"
annotations:
summary: "Endpoint {{ $labels.instance }} down"

6
config/blackbox.yml Normal file
View File

@ -0,0 +1,6 @@
modules:
http_2xx:
prober: http
timeout: 5s
http:
preferred_ip_protocol: ip4

View File

@ -0,0 +1,888 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": 5345,
"graphTooltip": 0,
"id": null,
"iteration": 1523027885726,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 15,
"panels": [],
"repeat": "targets",
"title": "$targets UP/DOWN Status",
"type": "row"
},
{
"cacheTimeout": null,
"colorBackground": true,
"colorValue": false,
"colors": [
"#d44a3a",
"rgba(237, 129, 40, 0.89)",
"#299c46"
],
"datasource": "Prometheus",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 1
},
"id": 2,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"minSpan": 3,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"repeat": null,
"repeatDirection": "h",
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "probe_success{target=~\"$targets\"}",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": "1,1",
"title": "$targets",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
},
{
"op": "=",
"text": "UP",
"value": "1"
},
{
"op": "=",
"text": "DOWN",
"value": "0"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": true,
"colorValue": false,
"colors": [
"#d44a3a",
"rgba(237, 129, 40, 0.89)",
"#299c46"
],
"datasource": "Prometheus",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 2,
"w": 6,
"x": 0,
"y": 3
},
"id": 18,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"minSpan": 3,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"repeatDirection": "h",
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "probe_http_ssl{target=~\"$targets\"}",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": "0,1",
"title": "SSL",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
},
{
"op": "=",
"text": "YES",
"value": "1"
},
{
"op": "=",
"text": "NO",
"value": "0"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"gridPos": {
"h": 6,
"w": 9,
"x": 6,
"y": 3
},
"id": 17,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "probe_duration_seconds{target=~\"$targets\"}",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"legendFormat": "seconds",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Probe Duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"gridPos": {
"h": 6,
"w": 9,
"x": 15,
"y": 3
},
"id": 21,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "probe_dns_lookup_time_seconds{target=~\"$targets\"}",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"legendFormat": "seconds",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "DNS Lookup",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"cacheTimeout": null,
"colorBackground": true,
"colorValue": false,
"colors": [
"#d44a3a",
"rgba(237, 129, 40, 0.89)",
"#299c46"
],
"datasource": "Prometheus",
"decimals": 2,
"format": "dtdurations",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 2,
"w": 6,
"x": 0,
"y": 5
},
"id": 19,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"minSpan": 3,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"repeatDirection": "h",
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "probe_ssl_earliest_cert_expiry{target=~\"$targets\"}-time()",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": "0,1209600",
"title": "SSL Cert Expiry",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
},
{
"op": "=",
"text": "YES",
"value": "1"
},
{
"op": "=",
"text": "NO",
"value": "0"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "Prometheus",
"decimals": 0,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 2,
"w": 6,
"x": 0,
"y": 7
},
"id": 20,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"minSpan": 3,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"repeatDirection": "h",
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "probe_http_status_code{target=~\"$targets\"}",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": "200,299,300",
"title": "HTTP Status Code",
"transparent": false,
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
},
{
"op": "=",
"text": "YES",
"value": "1"
},
{
"op": "=",
"text": "NO",
"value": "0"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "Prometheus",
"format": "s",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 2,
"w": 12,
"x": 0,
"y": 9
},
"id": 23,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "avg(probe_duration_seconds{target=~\"$targets\"})",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": "",
"title": "Average Probe Duration",
"type": "singlestat",
"valueFontSize": "50%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "Prometheus",
"format": "s",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 2,
"w": 12,
"x": 12,
"y": 9
},
"id": 24,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "avg(probe_dns_lookup_time_seconds{target=~\"$targets\"})",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": "",
"title": "Average DNS Lookup",
"type": "singlestat",
"valueFontSize": "50%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
}
],
"refresh": "1m",
"schemaVersion": 16,
"style": "dark",
"tags": [
"blackbox",
"prometheus"
],
"templating": {
"list": [
{
"auto": true,
"auto_count": 10,
"auto_min": "10s",
"current": {
"text": "auto",
"value": "$__auto_interval_interval"
},
"hide": 0,
"label": "Interval",
"name": "interval",
"options": [
{
"selected": true,
"text": "auto",
"value": "$__auto_interval_interval"
},
{
"selected": false,
"text": "5s",
"value": "5s"
},
{
"selected": false,
"text": "10s",
"value": "10s"
},
{
"selected": false,
"text": "30s",
"value": "30s"
},
{
"selected": false,
"text": "1m",
"value": "1m"
},
{
"selected": false,
"text": "10m",
"value": "10m"
},
{
"selected": false,
"text": "30m",
"value": "30m"
},
{
"selected": false,
"text": "1h",
"value": "1h"
},
{
"selected": false,
"text": "6h",
"value": "6h"
},
{
"selected": false,
"text": "12h",
"value": "12h"
},
{
"selected": false,
"text": "1d",
"value": "1d"
},
{
"selected": false,
"text": "7d",
"value": "7d"
},
{
"selected": false,
"text": "14d",
"value": "14d"
},
{
"selected": false,
"text": "30d",
"value": "30d"
}
],
"query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
"refresh": 2,
"type": "interval"
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "targets",
"options": [],
"query": "label_values(probe_success, target)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Blackbox Exporter",
"uid": "xtkCtBkiz",
"version": 4
}

View File

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: "Prometheus"
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

View File

@ -0,0 +1,11 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: true
editable: true

18
docker-compose.config.yml Normal file
View File

@ -0,0 +1,18 @@
version: "3"
services:
gomplate:
image: hairyhenderson/gomplate
container_name: gomplate
command: "--input-dir /templates --output-dir /config"
volumes:
- ./templates:/templates
- ./config:/config
environment:
- TARGETS=${TARGETS:?err}
- ALERT_EMAIL_TO=${ALERT_EMAIL_TO:?err}
- ALERT_EMAIL_FROM=${ALERT_EMAIL_FROM:?err}
- ALERT_SMTP_HOST=${ALERT_SMTP_HOST:?err}
- ALERT_SMTP_USER=${ALERT_SMTP_USER:?err}
- ALERT_SMTP_PASSWORD=${ALERT_SMTP_PASSWORD:?err}
- ALERT_SLACK_WEBHOOK=${ALERT_SLACK_WEBHOOK:?err}

66
docker-compose.yml Normal file
View File

@ -0,0 +1,66 @@
version: "3"
services:
prometheus:
image: prom/prometheus:v2.15.2
container_name: prometheus
restart: unless-stopped
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--web.console.libraries=/etc/prometheus/console_libraries"
- "--web.console.templates=/etc/prometheus/consoles"
- "--web.enable-lifecycle"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention=200h"
networks:
- monitoring
volumes:
- prometheus_data:/prometheus
- ./config/prometheus.yml:/etc/prometheus/prometheus.yml
- ./config/alertrules.yml:/etc/prometheus/alertrules.yml
blackbox:
image: prom/blackbox-exporter:v0.16.0
container_name: blackbox
restart: unless-stopped
command:
- "--config.file=/etc/blackbox/blackbox.yml"
networks:
- monitoring
volumes:
- ./config/blackbox.yml:/etc/blackbox/blackbox.yml
alertmanager:
image: prom/alertmanager:v0.20.0
container_name: alertmanager
restart: unless-stopped
command:
- "--config.file=/etc/alertmanager/alertmanager.yml"
- "--storage.path=/alertmanager"
networks:
- monitoring
volumes:
- ./config/alertmanager.yml:/etc/alertmanager/alertmanager.yml
grafana:
image: grafana/grafana:6.5.2
container_name: grafana
restart: unless-stopped
networks:
- monitoring
volumes:
- grafana_data:/var/lib/grafana
- ./config/grafana/provisioning:/etc/grafana/provisioning
environment:
- GF_SECURITY_ADMIN_USER=${GRAFANA_USER:?err}
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:?err}
- GF_USERS_ALLOW_SIGN_UP=false
ports:
- 3000:3000
networks:
monitoring: {}
volumes:
prometheus_data: {}
grafana_data: {}

View File

@ -0,0 +1,17 @@
route:
receiver: "default"
receivers:
- name: "default"
email_configs:
- to: "{{ .Env.ALERT_EMAIL_TO }}"
from: "{{ .Env.ALERT_EMAIL_FROM }}"
smarthost: "{{ .Env.ALERT_SMTP_HOST }}"
auth_username: "{{ .Env.ALERT_SMTP_USER }}"
auth_password: "{{ .Env.ALERT_SMTP_PASSWORD }}"
send_resolved: true
slack_configs:
- api_url: "{{ .Env.ALERT_SLACK_WEBHOOK }}"
send_resolved: true

41
templates/prometheus.yml Normal file
View File

@ -0,0 +1,41 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: "docker-host"
rule_files:
- alertrules.yml
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
- job_name: "grafana"
static_configs:
- targets: ["grafana:3000"]
- job_name: 'blackbox'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
{{ range (split .Env.TARGETS ",") }} - "{{ . }}"
{{ end }}
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- source_labels: [__param_target]
target_label: target
- target_label: __address__
replacement: blackbox:9115
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets: ["alertmanager:9093"]