From 945cae3908d0a985c896ed5b49cbcddc9848168b Mon Sep 17 00:00:00 2001 From: Yuri Lima Date: Thu, 22 May 2025 10:28:08 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=91=8C=20IMPROVE:=20Update=20Hetzner=20co?= =?UTF-8?q?nfiguration=20files=20and=20scripts=20to=20include=20new=20targ?= =?UTF-8?q?ets,=20enhance=20health=20check=20metrics,=20and=20organize=20f?= =?UTF-8?q?ile=20paths=20for=20better=20maintainability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/hetzner_dns_mapping.json | 12 +- config/hetzner_excluded_servers.json | 10 +- config/hetzner_targets.json | 428 ++++++++++++++++++++- config/phoenix_system_hetzner_targets.json | 1 + config/phoenix_worker_hetzner_targets.json | 1 + config/prometheus.yaml | 30 +- hetzner_discovery.py | 63 ++- 7 files changed, 508 insertions(+), 37 deletions(-) create mode 100644 config/phoenix_system_hetzner_targets.json create mode 100644 config/phoenix_worker_hetzner_targets.json diff --git a/config/hetzner_dns_mapping.json b/config/hetzner_dns_mapping.json index f37d872..a22dccc 100644 --- a/config/hetzner_dns_mapping.json +++ b/config/hetzner_dns_mapping.json @@ -27,14 +27,6 @@ "dns_name": "trachtenmode-schmid", "ip_address": "142.132.165.231" }, - { - "dns_name": "teamcity", - "ip_address": "23.88.107.109" - }, - { - "dns_name": "gitea", - "ip_address": "49.13.146.138" - }, { "dns_name": "phx-beta-rc", "ip_address": "159.69.200.205" @@ -166,5 +158,9 @@ { "dns_name": "test-ansible-ip-permission-2", "ip_address": "91.99.29.55" + }, + { + "dns_name": "bpp", + "ip_address": "91.99.107.178" } ] \ No newline at end of file diff --git a/config/hetzner_excluded_servers.json b/config/hetzner_excluded_servers.json index 56df0c3..4cc77fd 100644 --- a/config/hetzner_excluded_servers.json +++ b/config/hetzner_excluded_servers.json @@ -1,7 +1,13 @@ [ { - "server_id": 52530041, - "name": "docuvita", + "server_id": 41663080, + "name": "teamcity", + "datacenter": "nbg1", + "reason": "Darklisted server" + }, + { + "server_id": 41739626, + "name": "gitea", "datacenter": "nbg1", "reason": "Darklisted server" } diff --git a/config/hetzner_targets.json b/config/hetzner_targets.json index ab20b12..03ab19b 100644 --- a/config/hetzner_targets.json +++ b/config/hetzner_targets.json @@ -9,6 +9,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "157.90.161.42:9800" + ], + "labels": { + "instance": "PHX-DEV-001.Alpha", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "167.235.254.4:9100", @@ -19,6 +29,16 @@ "datacenter": "fsn1" } }, + { + "targets": [ + "167.235.254.4:9800" + ], + "labels": { + "instance": "ANSIBLE-MASTER", + "datacenter": "fsn1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.75.215:9100", @@ -29,6 +49,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.75.215:9800" + ], + "labels": { + "instance": "cts", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "162.55.52.253:9100", @@ -39,6 +69,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "162.55.52.253:9800" + ], + "labels": { + "instance": "Phx-Yuri", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "162.55.54.75:9100", @@ -49,6 +89,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "162.55.54.75:9800" + ], + "labels": { + "instance": "benzinger-demo", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "5.75.183.139:9100", @@ -59,6 +109,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "5.75.183.139:9800" + ], + "labels": { + "instance": "phx-internal", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "142.132.165.231:9100", @@ -71,22 +131,12 @@ }, { "targets": [ - "23.88.107.109:9100", - "23.88.107.109:9113" + "142.132.165.231:9800" ], "labels": { - "instance": "teamcity", - "datacenter": "nbg1" - } - }, - { - "targets": [ - "49.13.146.138:9100", - "49.13.146.138:9113" - ], - "labels": { - "instance": "gitea", - "datacenter": "nbg1" + "instance": "trachtenmode-schmid", + "datacenter": "nbg1", + "job": "health-exporter" } }, { @@ -99,6 +149,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "159.69.200.205:9800" + ], + "labels": { + "instance": "phx-beta-rc", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.53.137:9100", @@ -109,6 +169,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.53.137:9800" + ], + "labels": { + "instance": "lhl", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "5.75.153.161:9100", @@ -119,6 +189,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "5.75.153.161:9800" + ], + "labels": { + "instance": "Grafana-Prometheus", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "159.69.44.39:9100", @@ -129,6 +209,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "159.69.44.39:9800" + ], + "labels": { + "instance": "cooper", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "49.13.165.13:9100", @@ -139,6 +229,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "49.13.165.13:9800" + ], + "labels": { + "instance": "shipxpert", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "188.245.44.219:9100", @@ -149,6 +249,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "188.245.44.219:9800" + ], + "labels": { + "instance": "sartissohn", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.92.218:9100", @@ -159,6 +269,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.92.218:9800" + ], + "labels": { + "instance": "guntli", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "94.130.77.57:9100", @@ -169,6 +289,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "94.130.77.57:9800" + ], + "labels": { + "instance": "kolb", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.151.20:9100", @@ -179,6 +309,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.151.20:9800" + ], + "labels": { + "instance": "ried", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.68.120:9100", @@ -189,6 +329,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.68.120:9800" + ], + "labels": { + "instance": "heba", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.128.69:9100", @@ -199,6 +349,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.128.69:9800" + ], + "labels": { + "instance": "eicsoft", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "159.69.93.252:9100", @@ -209,6 +369,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "159.69.93.252:9800" + ], + "labels": { + "instance": "ck-vechta", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.46.171:9100", @@ -219,6 +389,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.46.171:9800" + ], + "labels": { + "instance": "eeparts", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "167.235.130.242:9100", @@ -229,6 +409,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "167.235.130.242:9800" + ], + "labels": { + "instance": "big-break-changes", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "188.245.32.214:9100", @@ -239,6 +429,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "188.245.32.214:9800" + ], + "labels": { + "instance": "PHX-License-Server", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "138.199.201.35:9100", @@ -249,6 +449,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "138.199.201.35:9800" + ], + "labels": { + "instance": "PHX-SkyNodus", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "188.245.108.133:9100", @@ -259,6 +469,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "188.245.108.133:9800" + ], + "labels": { + "instance": "bode", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "168.119.174.181:9100", @@ -269,6 +489,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "168.119.174.181:9800" + ], + "labels": { + "instance": "skr04", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "91.99.24.53:9100", @@ -279,6 +509,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "91.99.24.53:9800" + ], + "labels": { + "instance": "artwin", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "168.119.231.96:9100", @@ -289,6 +529,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "168.119.231.96:9800" + ], + "labels": { + "instance": "amplid", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.143.63:9100", @@ -299,6 +549,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.143.63:9800" + ], + "labels": { + "instance": "schwarz", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "91.99.53.67:9100", @@ -309,6 +569,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "91.99.53.67:9800" + ], + "labels": { + "instance": "skr03", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "91.99.52.253:9100", @@ -319,6 +589,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "91.99.52.253:9800" + ], + "labels": { + "instance": "moonich", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "91.99.76.156:9100", @@ -329,6 +609,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "91.99.76.156:9800" + ], + "labels": { + "instance": "comp-sys", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "78.46.214.224:9100", @@ -339,6 +629,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "78.46.214.224:9800" + ], + "labels": { + "instance": "skr-ch", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.18.156:9100", @@ -349,6 +649,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.18.156:9800" + ], + "labels": { + "instance": "wsoft", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "91.99.71.224:9100", @@ -359,6 +669,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "91.99.71.224:9800" + ], + "labels": { + "instance": "bzp", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "49.13.197.152:9100", @@ -369,6 +689,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "49.13.197.152:9800" + ], + "labels": { + "instance": "dss", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "128.140.103.178:9100", @@ -379,6 +709,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "128.140.103.178:9800" + ], + "labels": { + "instance": "new-full-test", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "157.90.228.52:9100", @@ -389,6 +729,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "157.90.228.52:9800" + ], + "labels": { + "instance": "modern", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "23.88.56.89:9100", @@ -399,6 +749,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "23.88.56.89:9800" + ], + "labels": { + "instance": "burg-itc", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "116.203.73.240:9100", @@ -409,6 +769,16 @@ "datacenter": "nbg1" } }, + { + "targets": [ + "116.203.73.240:9800" + ], + "labels": { + "instance": "test-ansible-ip-permission", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, { "targets": [ "91.99.29.55:9100", @@ -418,5 +788,35 @@ "instance": "test-ansible-ip-permission-2", "datacenter": "nbg1" } + }, + { + "targets": [ + "91.99.29.55:9800" + ], + "labels": { + "instance": "test-ansible-ip-permission-2", + "datacenter": "nbg1", + "job": "health-exporter" + } + }, + { + "targets": [ + "91.99.107.178:9100", + "91.99.107.178:9113" + ], + "labels": { + "instance": "bpp", + "datacenter": "nbg1" + } + }, + { + "targets": [ + "91.99.107.178:9800" + ], + "labels": { + "instance": "bpp", + "datacenter": "nbg1", + "job": "health-exporter" + } } ] \ No newline at end of file diff --git a/config/phoenix_system_hetzner_targets.json b/config/phoenix_system_hetzner_targets.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/config/phoenix_system_hetzner_targets.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/config/phoenix_worker_hetzner_targets.json b/config/phoenix_worker_hetzner_targets.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/config/phoenix_worker_hetzner_targets.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/config/prometheus.yaml b/config/prometheus.yaml index 3ece4bd..459c36a 100644 --- a/config/prometheus.yaml +++ b/config/prometheus.yaml @@ -1,12 +1,28 @@ global: - scrape_interval: 15s # Default for all jobs unless overridden + scrape_interval: 15s # ⏱️ Default interval Prometheus uses to scrape metrics from all targets, unless overridden in specific jobs. scrape_configs: # Existing Hetzner Dynamic Configuration - - job_name: "hetzner-dynamic" - scheme: http - file_sd_configs: + - job_name: "hetzner-dynamic" # 🏷️ Logical name for the scrape job (used in metrics as job="hetzner-dynamic"). + file_sd_configs: # 📂 Enable file-based service discovery. Prometheus will look at files to dynamically load targets. - files: - - "/opt/phx/main/config/hetzner_targets.json" - refresh_interval: 30s - scrape_interval: 5s # Custom interval for Hetzner servers \ No newline at end of file + - "/opt/phx/main/config/hetzner_targets.json" # 📁 JSON file containing the list of targets (e.g. IPs, ports, labels). + refresh_interval: 30s # 🔁 How often Prometheus checks the file for changes and updates the target list. + scrape_interval: 5s # ⏱️ Override global default: scrape metrics from each target in this job every 5 seconds. + scheme: http # 🌐 Use plain HTTP when scraping the targets (not HTTPS). + + # - job_name: "phoenix-system" + # file_sd_configs: + # - files: + # - "/opt/phx/main/config/phoenix_system_hetzner_targets.json" + # refresh_interval: 30s + # scrape_interval: 5s + # scheme: http + + # - job_name: "phoenix-worker" + # file_sd_configs: + # - files: + # - "/opt/phx/main/config/phoenix_worker_hetzner_targets.json" + # refresh_interval: 30s + # scrape_interval: 5s + # scheme: http \ No newline at end of file diff --git a/hetzner_discovery.py b/hetzner_discovery.py index 434c56f..9914310 100644 --- a/hetzner_discovery.py +++ b/hetzner_discovery.py @@ -17,10 +17,20 @@ if not HETZNER_API_TOKEN: raise ValueError("❌ HETZNER_API_TOKEN is missing! Make sure it's set in the .env file.") # 📂 Paths to output files -PROMETHEUS_TARGETS_FILE = "/opt/phx/main/config/hetzner_targets.json" -ERROR_LOG_FILE = "/opt/phx/main/config/hetzner_error_servers.json" -EXCLUDED_SERVERS_FILE = "/opt/phx/main/config/hetzner_excluded_servers.json" -DNS_MAPPING_FILE = "/opt/phx/main/config/hetzner_dns_mapping.json" +# PROMETHEUS_TARGETS_FILE = "/opt/phx/main/config/hetzner_targets.json" +# ERROR_LOG_FILE = "/opt/phx/main/config/hetzner_error_servers.json" +# EXCLUDED_SERVERS_FILE = "/opt/phx/main/config/hetzner_excluded_servers.json" +# DNS_MAPPING_FILE = "/opt/phx/main/config/hetzner_dns_mapping.json" + + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +PROMETHEUS_TARGETS_FILE = os.path.join(BASE_DIR, "config/hetzner_targets.json") +PHX_SYSTEM_FILE = os.path.join(BASE_DIR, "config/phoenix_system_hetzner_targets.json") +PHX_WORKER_FILE = os.path.join(BASE_DIR, "config/phoenix_worker_hetzner_targets.json") +DNS_MAPPING_FILE = os.path.join(BASE_DIR, "config/hetzner_dns_mapping.json") +ERROR_LOG_FILE = os.path.join(BASE_DIR, "config/hetzner_error_servers.json") +EXCLUDED_SERVERS_FILE = os.path.join(BASE_DIR, "config/hetzner_excluded_servers.json") # 📌 Hetzner API URL HETZNER_API_URL = os.getenv("HETZNER_API_URL") @@ -30,7 +40,7 @@ if not HETZNER_API_URL: raise ValueError("❌ HETZNER_API_URL is missing! Make sure it's set in the .env file.") # 🛑 List of server names to exclude (DARKLIST) -DARKLISTED_SERVERS = ["docuvita"] +DARKLISTED_SERVERS = ["docuvita", "teamcity", "gitea"] # 📡 Fetch Hetzner server list with pagination support and Rate Limiting handling def get_hetzner_servers(): @@ -78,6 +88,8 @@ def get_hetzner_servers(): def generate_prometheus_sd_config(): servers = get_hetzner_servers() targets = [] + phx_system_targets = [] + phx_worker_targets = [] error_servers = [] excluded_servers = [] dns_mappings = [] # New list for storing DNS-IP mappings @@ -97,6 +109,7 @@ def generate_prometheus_sd_config(): continue # Skip adding to Prometheus targets if ipv4: + # Add to DNS mapping file targets.append({ "targets": [f"{ipv4}:9100", f"{ipv4}:9113"], "labels": { @@ -104,7 +117,37 @@ def generate_prometheus_sd_config(): "datacenter": datacenter } }) - # Add to DNS mapping file + + # This is with Python Flask server for health checks + targets.append({ + "targets": [f"{ipv4}:9800"], + "labels": { + "instance": f"{server_name}", + "datacenter": datacenter, + "job": "health-exporter" + } + }) + + # Phoenix System metrics (port 3000) + # phx_system_targets.append({ + # "targets": [f"{ipv4}:3000"], + # "labels": { + # "instance": f"{server_name}", + # "datacenter": datacenter, + # "__metrics_path__": "/health/metrics" + # } + # }) + + # # Phoenix Worker metrics (port 3001) + # phx_worker_targets.append({ + # "targets": [f"{ipv4}:3001"], + # "labels": { + # "instance": f"{server_name}", + # "datacenter": datacenter, + # "__metrics_path__": "/health/metrics" + # } + # }) + dns_mappings.append({ "dns_name": server_name, "ip_address": ipv4 @@ -125,6 +168,14 @@ def generate_prometheus_sd_config(): print(f"✅ Updated Prometheus targets in {PROMETHEUS_TARGETS_FILE}") + with open(PHX_SYSTEM_FILE, "w") as f: + json.dump(phx_system_targets, f, indent=4) + print(f"✅ phoenix-system targets saved to {PHX_SYSTEM_FILE}") + + with open(PHX_WORKER_FILE, "w") as f: + json.dump(phx_worker_targets, f, indent=4) + print(f"✅ phoenix-worker targets saved to {PHX_WORKER_FILE}") + # Save DNS Mappings file with open(DNS_MAPPING_FILE, "w") as f: json.dump(dns_mappings, f, indent=4)