services_uptime_monitor.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. #!/usr/bin/env python3
  2. """
  3. Uptime Monitor for smallmountains.de
  4. Checks service availability and updates the Notion dashboard.
  5. Run via cron every 5 minutes:
  6. */5 * * * * /usr/local/bin/python3 /path/to/services_uptime_monitor.py >> /path/to/monitor.log 2>&1
  7. Services are configured entirely in the Notion database — no code changes needed
  8. to add, remove, or reconfigure a service.
  9. """
  10. from __future__ import annotations
  11. import sys
  12. import sqlite3
  13. import socket
  14. import subprocess
  15. import time
  16. from datetime import datetime, timezone, timedelta
  17. from pathlib import Path
  18. import requests
  19. from notion_client import Client
  20. from notion_client.errors import APIResponseError
  21. # ── Configuration ─────────────────────────────────────────────────────────────
  22. NOTION_TOKEN = "secret_b7PiPL2FqC9QEikqkAEWOht7LmzPMIJMWTzUPWwbw4H"
  23. NOTION_DATA_SOURCE_ID = "22174dd2-e6fc-4dc9-ac86-a5d614c995bd" # Services data source
  24. UPTIME_PAGE_ID = "38210a5f51bd807bae1edb699d9591e8" # Uptime Tracker page
  25. DB_PATH = Path(__file__).parent / "uptime_history.db"
  26. HTTP_TIMEOUT = 10 # seconds
  27. UDP_TIMEOUT = 5 # seconds
  28. # ── Fetch services from Notion ─────────────────────────────────────────────────
  29. def fetch_services(notion: Client) -> list[dict]:
  30. """
  31. Read the Services database and return a list of service dicts.
  32. Each HTTP service dict: {"name", "notion_page_id", "type": "http", "url"}
  33. Each UDP service dict: {"name", "notion_page_id", "type": "udp", "host", "port"}
  34. Rows missing a name or URL are skipped with a warning.
  35. Check Type defaults to HTTP when the field is left blank.
  36. """
  37. response = notion.data_sources.query(NOTION_DATA_SOURCE_ID)
  38. services = []
  39. for page in response["results"]:
  40. props = page["properties"]
  41. # Service name
  42. title_arr = props.get("Service", {}).get("title", [])
  43. name = title_arr[0]["plain_text"].strip() if title_arr else ""
  44. if not name:
  45. continue
  46. # URL / endpoint
  47. url = (props.get("URL") or {}).get("url") or ""
  48. if not url:
  49. print(f" Skipping '{name}': no URL configured in Notion")
  50. continue
  51. # Check Type (default to HTTP when blank)
  52. select = ((props.get("Check Type") or {}).get("select")) or {}
  53. check_type = select.get("name", "HTTP").upper()
  54. service: dict = {
  55. "name": name,
  56. "notion_page_id": page["id"],
  57. "type": check_type.lower(),
  58. }
  59. if check_type == "UDP":
  60. # URL field stores "host:port"
  61. host, sep, port_str = url.rpartition(":")
  62. service["host"] = host if sep else url
  63. service["port"] = int(port_str) if port_str.isdigit() else 34197
  64. else:
  65. service["url"] = url
  66. services.append(service)
  67. if not services:
  68. raise RuntimeError(
  69. "No services found in Notion database. "
  70. "Check that NOTION_DATABASE_ID is correct and PyBot has access."
  71. )
  72. return services
  73. # ── SQLite ─────────────────────────────────────────────────────────────────────
  74. def init_db(db_path: Path) -> sqlite3.Connection:
  75. conn = sqlite3.connect(db_path)
  76. conn.execute("""
  77. CREATE TABLE IF NOT EXISTS checks (
  78. id INTEGER PRIMARY KEY AUTOINCREMENT,
  79. service_name TEXT NOT NULL,
  80. checked_at TEXT NOT NULL,
  81. is_online INTEGER NOT NULL,
  82. response_time_ms REAL
  83. )
  84. """)
  85. conn.execute(
  86. "CREATE INDEX IF NOT EXISTS idx_service_time ON checks (service_name, checked_at)"
  87. )
  88. conn.commit()
  89. return conn
  90. def prune_old_records(conn: sqlite3.Connection):
  91. cutoff = (datetime.now(timezone.utc) - timedelta(days=35)).isoformat()
  92. conn.execute("DELETE FROM checks WHERE checked_at < ?", (cutoff,))
  93. conn.commit()
  94. def compute_uptime(conn: sqlite3.Connection, service_name: str, hours: int) -> float | None:
  95. """Return fraction 0.0–1.0 for Notion's percent format, or None if no data."""
  96. cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
  97. row = conn.execute(
  98. "SELECT COUNT(*), COALESCE(SUM(is_online), 0) FROM checks "
  99. "WHERE service_name = ? AND checked_at >= ?",
  100. (service_name, cutoff),
  101. ).fetchone()
  102. total, online = row
  103. if total == 0:
  104. return None
  105. return online / total
  106. # ── Service Checks ─────────────────────────────────────────────────────────────
  107. def check_http(url: str) -> tuple[bool, float | None]:
  108. try:
  109. start = time.monotonic()
  110. resp = requests.get(url, timeout=HTTP_TIMEOUT, allow_redirects=True)
  111. elapsed_ms = (time.monotonic() - start) * 1000
  112. return resp.status_code < 500, round(elapsed_ms, 1)
  113. except requests.RequestException:
  114. return False, None
  115. def _ping(host: str) -> bool:
  116. # -W timeout unit differs: milliseconds on macOS, seconds on Linux
  117. w_arg = "3000" if sys.platform == "darwin" else "3"
  118. try:
  119. result = subprocess.run(
  120. ["ping", "-c", "1", "-W", w_arg, host],
  121. capture_output=True,
  122. timeout=6,
  123. )
  124. return result.returncode == 0
  125. except Exception:
  126. return False
  127. def check_udp(host: str, port: int) -> tuple[bool, None]:
  128. """
  129. Send a probe UDP packet.
  130. - Response received → online
  131. - ConnectionRefusedError → offline (ICMP port-unreachable)
  132. - Timeout → fall back to ICMP ping
  133. """
  134. try:
  135. sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  136. sock.settimeout(UDP_TIMEOUT)
  137. sock.connect((host, port))
  138. sock.send(b"\x00\x00\x00\x00")
  139. try:
  140. sock.recv(1024)
  141. return True, None
  142. except socket.timeout:
  143. return _ping(host), None
  144. except ConnectionRefusedError:
  145. return False, None
  146. except (socket.gaierror, OSError):
  147. return False, None
  148. finally:
  149. try:
  150. sock.close()
  151. except Exception:
  152. pass
  153. def check_service(service: dict) -> tuple[bool, float | None]:
  154. if service["type"] == "http":
  155. return check_http(service["url"])
  156. if service["type"] == "udp":
  157. return check_udp(service["host"], service["port"])
  158. return False, None
  159. # ── Notion ─────────────────────────────────────────────────────────────────────
  160. def update_notion_service(
  161. notion: Client,
  162. service: dict,
  163. is_online: bool,
  164. response_ms: float | None,
  165. uptime_24h: float | None,
  166. uptime_7d: float | None,
  167. uptime_30d: float | None,
  168. ):
  169. now_iso = datetime.now(timezone.utc).isoformat()
  170. notion.pages.update(
  171. page_id=service["notion_page_id"],
  172. properties={
  173. "Status": {"select": {"name": "Online" if is_online else "Offline"}},
  174. "Last Checked": {"date": {"start": now_iso}},
  175. "Response Time (ms)": {"number": response_ms},
  176. "Uptime 24h %": {"number": uptime_24h},
  177. "Uptime 7d %": {"number": uptime_7d},
  178. "Uptime 30d %": {"number": uptime_30d},
  179. },
  180. )
  181. def update_last_updated_block(notion: Client, page_id: str, timestamp_str: str):
  182. """Find the callout/paragraph containing 'Last Updated' and refresh its text."""
  183. try:
  184. result = notion.blocks.children.list(block_id=page_id)
  185. for block in result.get("results", []):
  186. btype = block.get("type")
  187. if btype not in ("callout", "paragraph", "quote"):
  188. continue
  189. rich_text = block.get(btype, {}).get("rich_text", [])
  190. plain = "".join(rt.get("plain_text", "") for rt in rich_text)
  191. if "Last Updated" not in plain:
  192. continue
  193. notion.blocks.update(
  194. block_id=block["id"],
  195. **{
  196. btype: {
  197. "rich_text": [
  198. {
  199. "type": "text",
  200. "text": {"content": f"🔄 Last Updated: {timestamp_str}"},
  201. "annotations": {"bold": False},
  202. }
  203. ]
  204. }
  205. },
  206. )
  207. return
  208. except APIResponseError as e:
  209. print(f" Warning: could not update Last Updated block: {e}")
  210. # ── Main ───────────────────────────────────────────────────────────────────────
  211. def main():
  212. notion = Client(auth=NOTION_TOKEN)
  213. services = fetch_services(notion)
  214. print(f"Loaded {len(services)} service(s) from Notion.")
  215. conn = init_db(DB_PATH)
  216. prune_old_records(conn)
  217. now_utc = datetime.now(timezone.utc)
  218. now_iso = now_utc.isoformat()
  219. now_display = now_utc.strftime("%Y-%m-%d %H:%M UTC")
  220. print(f"[{now_display}] Running uptime checks...")
  221. for service in services:
  222. is_online, response_ms = check_service(service)
  223. status_str = "ONLINE " if is_online else "OFFLINE"
  224. rt_str = f"{response_ms:.0f}ms" if response_ms is not None else "—"
  225. print(f" {service['name']:<20} {status_str} {rt_str}")
  226. conn.execute(
  227. "INSERT INTO checks (service_name, checked_at, is_online, response_time_ms) "
  228. "VALUES (?, ?, ?, ?)",
  229. (service["name"], now_iso, int(is_online), response_ms),
  230. )
  231. conn.commit()
  232. uptime_24h = compute_uptime(conn, service["name"], 24)
  233. uptime_7d = compute_uptime(conn, service["name"], 7 * 24)
  234. uptime_30d = compute_uptime(conn, service["name"], 30 * 24)
  235. try:
  236. update_notion_service(
  237. notion, service, is_online, response_ms,
  238. uptime_24h, uptime_7d, uptime_30d,
  239. )
  240. except APIResponseError as e:
  241. print(f" Warning: Notion update failed for {service['name']}: {e}")
  242. update_last_updated_block(notion, UPTIME_PAGE_ID, now_display)
  243. conn.close()
  244. print("Done.")
  245. if __name__ == "__main__":
  246. main()