Files
Grafana-Prometheus/hetzner_discovery.py

231 lines
8.7 KiB
Python

import requests
import json
import os
import time
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# 🔑 Load Hetzner API Token & Prometheus URL
HETZNER_API_TOKEN = os.getenv("HETZNER_API_TOKEN")
# https://prometheus.io/docs/prometheus/latest/configuration/configuration/
PROMETHEUS_RELOAD_URL = os.getenv("PROMETHEUS_RELOAD_URL", "https://prometheus.phx-erp.de/-/reload")
# Check if token is loaded
if not HETZNER_API_TOKEN:
raise ValueError("❌ HETZNER_API_TOKEN is missing! Make sure it's set in the .env file.")
# 📂 Paths to output files
# PROMETHEUS_TARGETS_FILE = "/opt/phx/main/config/hetzner_targets.json"
# ERROR_LOG_FILE = "/opt/phx/main/config/hetzner_error_servers.json"
# EXCLUDED_SERVERS_FILE = "/opt/phx/main/config/hetzner_excluded_servers.json"
# DNS_MAPPING_FILE = "/opt/phx/main/config/hetzner_dns_mapping.json"
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
HEARTBEAT_TARGETS_FILE = os.path.join(BASE_DIR, "config/hetzner_heartbeat_targets.json")
PROMETHEUS_TARGETS_FILE = os.path.join(BASE_DIR, "config/hetzner_targets.json")
PHX_SYSTEM_FILE = os.path.join(BASE_DIR, "config/phoenix_system_hetzner_targets.json")
PHX_WORKER_FILE = os.path.join(BASE_DIR, "config/phoenix_worker_hetzner_targets.json")
PHX_HEALTH_EXPORTER_FILE = os.path.join(BASE_DIR, "config/phoenix_health_exporter_hetzner_targets.json")
DNS_MAPPING_FILE = os.path.join(BASE_DIR, "config/hetzner_dns_mapping.json")
ERROR_LOG_FILE = os.path.join(BASE_DIR, "config/hetzner_error_servers.json")
EXCLUDED_SERVERS_FILE = os.path.join(BASE_DIR, "config/hetzner_excluded_servers.json")
# 📌 Hetzner API URL
HETZNER_API_URL = os.getenv("HETZNER_API_URL")
# Check if API URL is loaded
if not HETZNER_API_URL:
raise ValueError("❌ HETZNER_API_URL is missing! Make sure it's set in the .env file.")
# 🛑 List of server names to exclude (DARKLIST)
DARKLISTED_SERVERS = ["docuvita", "teamcity", "gitea"]
# 📡 Fetch Hetzner server list with pagination support and Rate Limiting handling
def get_hetzner_servers():
headers = {"Authorization": f"Bearer {HETZNER_API_TOKEN}"}
all_servers = []
page = 1
per_page = 50 # Max per request
while True:
response = requests.get(
f"{HETZNER_API_URL}?page={page}&per_page={per_page}", headers=headers
)
# Handle Rate Limiting (429 Too Many Requests)
if response.status_code == 429:
reset_time = int(response.headers.get("RateLimit-Reset", time.time() + 60))
wait_time = reset_time - int(time.time())
print(f"⏳ Rate limit exceeded! Waiting {wait_time} seconds until reset...")
time.sleep(wait_time)
continue # Retry after waiting
if response.status_code != 200:
print(f"❌ Error fetching servers: {response.text}")
break # Stop fetching if error
# Read rate limit headers
remaining = response.headers.get("RateLimit-Remaining", "unknown")
reset_time = int(response.headers.get("RateLimit-Reset", time.time() + 60))
print(f"📊 API Rate Limit: {remaining} requests remaining. Next reset at {reset_time}.")
data = response.json()
servers = data.get("servers", [])
all_servers.extend(servers)
# Check if there's a next page
pagination = data.get("meta", {}).get("pagination", {})
if not pagination.get("next_page"):
break # Exit if no more pages
page = pagination["next_page"] # Move to the next page
return all_servers
# 🏗️ Generate Prometheus JSON file and DNS Mapping
def generate_prometheus_sd_config():
servers = get_hetzner_servers()
targets = []
phx_system_targets = []
phx_worker_targets = []
phx_health_exporter_targets = []
error_servers = []
excluded_servers = []
dns_mappings = [] # New list for storing DNS-IP mappings
heartbeat_targets = []
for server in servers:
ipv4 = server.get("public_net", {}).get("ipv4", {}).get("ip")
server_name = server["name"]
datacenter = server["datacenter"]["location"]["name"]
if server_name in DARKLISTED_SERVERS:
excluded_servers.append({
"server_id": server["id"],
"name": server_name,
"datacenter": datacenter,
"reason": "Darklisted server"
})
continue # Skip adding to Prometheus targets
if ipv4:
# Add to DNS mapping file
targets.append({
"targets": [f"{ipv4}:9100", f"{ipv4}:9113"],
"labels": {
"instance": server_name,
"datacenter": datacenter
}
})
# This is with Python Flask server for health checks
phx_health_exporter_targets.append({
"targets": [f"{ipv4}:9800"],
"labels": {
"instance": f"{server_name}-health-exporter",
"datacenter": datacenter
}
})
# Add Pushgateway heartbeat endpoint (default Pushgateway port is 9091)
heartbeat_targets.append({
"targets": [f"{ipv4}:9091"], # assuming Pushgateway is running on each server
"labels": {
"instance": server_name,
"job": "heartbeat"
}
})
# Phoenix System metrics (port 3000)
# phx_system_targets.append({
# "targets": [f"{ipv4}:3000"],
# "labels": {
# "instance": f"{server_name}",
# "datacenter": datacenter,
# "__metrics_path__": "/health/metrics"
# }
# })
# # Phoenix Worker metrics (port 3001)
# phx_worker_targets.append({
# "targets": [f"{ipv4}:3001"],
# "labels": {
# "instance": f"{server_name}",
# "datacenter": datacenter,
# "__metrics_path__": "/health/metrics"
# }
# })
dns_mappings.append({
"dns_name": server_name,
"ip_address": ipv4
})
else:
# Log the server that couldn't be added
error_servers.append({
"server_id": server["id"],
"name": server_name,
"status": server["status"],
"datacenter": datacenter,
"reason": "Missing public_net or IPv4"
})
# Save JSON to Prometheus target file
with open(PROMETHEUS_TARGETS_FILE, "w") as f:
json.dump(targets, f, indent=4)
print(f"✅ Updated Prometheus targets in {PROMETHEUS_TARGETS_FILE}")
with open(PHX_HEALTH_EXPORTER_FILE, "w") as f:
json.dump(phx_health_exporter_targets, f, indent=4)
print(f"✅ phoenix-health-exporter targets saved to {PHX_HEALTH_EXPORTER_FILE}")
with open(HEARTBEAT_TARGETS_FILE, "w") as f:
json.dump(heartbeat_targets, f, indent=4)
print(f"✅ heartbeat targets saved to {HEARTBEAT_TARGETS_FILE}")
# with open(PHX_SYSTEM_FILE, "w") as f:
# json.dump(phx_system_targets, f, indent=4)
# print(f"✅ phoenix-system targets saved to {PHX_SYSTEM_FILE}")
# with open(PHX_WORKER_FILE, "w") as f:
# json.dump(phx_worker_targets, f, indent=4)
# print(f"✅ phoenix-worker targets saved to {PHX_WORKER_FILE}")
# Save DNS Mappings file
with open(DNS_MAPPING_FILE, "w") as f:
json.dump(dns_mappings, f, indent=4)
print(f"📡 Created DNS Mapping file: {DNS_MAPPING_FILE}")
# Save error logs if any servers were skipped due to missing data
if error_servers:
with open(ERROR_LOG_FILE, "w") as f:
json.dump(error_servers, f, indent=4)
print(f"⚠️ Some servers could not be added. Check {ERROR_LOG_FILE} for details.")
# Save excluded servers log
if excluded_servers:
with open(EXCLUDED_SERVERS_FILE, "w") as f:
json.dump(excluded_servers, f, indent=4)
print(f"🚫 Darklisted servers were skipped. See {EXCLUDED_SERVERS_FILE} for details.")
else:
print("✅ No servers were excluded due to the darklist.")
# 🔄 **Trigger Prometheus Reload**
try:
response = requests.post(PROMETHEUS_RELOAD_URL)
if response.status_code == 200:
print("🔄 ✅ Prometheus configuration reloaded successfully!")
else:
print(f"⚠️ Warning: Prometheus reload failed with status {response.status_code}: {response.text}")
except requests.exceptions.RequestException as e:
print(f"❌ Error reloading Prometheus: {e}")
# 🔄 Run the script
if __name__ == "__main__":
generate_prometheus_sd_config()