import json from dataclasses import dataclass from typing import Any from urllib import error, request # Generic error raised for any Vast.ai API failure class VastApiError(RuntimeError): pass # Lightweight view of a Vast.ai instance with the fields the pipeline cares about @dataclass(slots=True) class VastInstance: id: int actual_status: str ssh_host: str | None ssh_port: int | None public_ipaddr: str | None gpu_name: str | None dph_total: float | None raw: dict[str, Any] # Thin wrapper around the Vast.ai REST API class VastApiClient: def __init__(self, api_key: str, *, base_url: str = "https://console.vast.ai") -> None: self.api_key = api_key self.base_url = base_url.rstrip("/") # Low-level request helper — sends JSON, returns parsed response body def _request(self, method: str, path: str, payload: dict[str, Any] | None = None) -> Any: url = f"{self.base_url}{path}" data = None headers = {"Authorization": f"Bearer {self.api_key}"} if payload is not None: headers["Content-Type"] = "application/json" data = json.dumps(payload).encode("utf-8") req = request.Request(url, method=method, data=data, headers=headers) try: with request.urlopen(req, timeout=60) as response: body = response.read() # Vast.ai returns varied error formats; surface whatever body we get except error.HTTPError as exc: details = exc.read().decode("utf-8", errors="replace") raise VastApiError(f"{method} {path} failed with {exc.code}: {details}") from exc except error.URLError as exc: raise VastApiError(f"{method} {path} failed: {exc.reason}") from exc if not body: return None return json.loads(body) # Fetch the currently authenticated user's profile def show_user(self) -> dict[str, Any]: return self._request("GET", "/api/v0/users/current/") # ── SSH keys ──────────────────────────────────────────────────────── # List registered SSH keys; handles inconsistent response shapes from the API def show_ssh_keys(self) -> list[dict[str, Any]]: response = self._request("GET", "/api/v0/ssh/") if isinstance(response, list): return response if isinstance(response, dict): for key in ("keys", "ssh_keys"): value = response.get(key) if isinstance(value, list): return value raise VastApiError(f"Unexpected SSH key response: {response}") # Register the public key if it isn't already present def ensure_ssh_key(self, public_key: str) -> None: existing_keys = self.show_ssh_keys() if any( ( item.get("key") or item.get("public_key") or item.get("ssh_key") or "" ).strip() == public_key for item in existing_keys ): return self._request("POST", "/api/v0/ssh/", {"ssh_key": public_key}) # Authorise an SSH key for a running instance def attach_ssh_key(self, instance_id: int, public_key: str) -> None: self._request("POST", f"/api/v0/instances/{instance_id}/ssh/", {"ssh_key": public_key}) # ── Offers ───────────────────────────────────────────────────────── # Search available GPU offers matching a query filter def search_offers(self, query: dict[str, Any]) -> list[dict[str, Any]]: response = self._request("POST", "/api/v0/bundles/", query) offers = response.get("offers", []) if isinstance(offers, dict): return [offers] return offers # ── Instances ────────────────────────────────────────────────────── # Rent an offer, returning the new contract (instance) ID def create_instance(self, offer_id: int, payload: dict[str, Any]) -> int: response = self._request("PUT", f"/api/v0/asks/{offer_id}/", payload) if not response or not response.get("success"): raise VastApiError(f"Instance creation failed for offer {offer_id}: {response}") return int(response["new_contract"]) # Fetch current status and connection details for an instance def show_instance(self, instance_id: int) -> VastInstance: response = self._request("GET", f"/api/v0/instances/{instance_id}/") raw = response.get("instances") if not raw: raise VastApiError(f"No instance details found for {instance_id}: {response}") return VastInstance( id=int(raw["id"]), actual_status=raw.get("actual_status", ""), ssh_host=raw.get("ssh_host"), ssh_port=raw.get("ssh_port"), public_ipaddr=raw.get("public_ipaddr"), gpu_name=raw.get("gpu_name"), dph_total=raw.get("dph_total"), raw=raw, ) # Permanently destroy an instance (releases the GPU and billing) def destroy_instance(self, instance_id: int) -> None: self._request("DELETE", f"/api/v0/instances/{instance_id}/")