Files
DRL_PROJ/pipeline/vast_api.py
T
Johnny Fernandes bb3dfb92d5 Clean state
2026-04-30 01:25:39 +01:00

132 lines
5.4 KiB
Python

import json
from dataclasses import dataclass
from typing import Any
from urllib import error, request
# Generic error raised for any Vast.ai API failure
class VastApiError(RuntimeError):
pass
# Lightweight view of a Vast.ai instance with the fields the pipeline cares about
@dataclass(slots=True)
class VastInstance:
id: int
actual_status: str
ssh_host: str | None
ssh_port: int | None
public_ipaddr: str | None
gpu_name: str | None
dph_total: float | None
raw: dict[str, Any]
# Thin wrapper around the Vast.ai REST API
class VastApiClient:
def __init__(self, api_key: str, *, base_url: str = "https://console.vast.ai") -> None:
self.api_key = api_key
self.base_url = base_url.rstrip("/")
# Low-level request helper — sends JSON, returns parsed response body
def _request(self, method: str, path: str, payload: dict[str, Any] | None = None) -> Any:
url = f"{self.base_url}{path}"
data = None
headers = {"Authorization": f"Bearer {self.api_key}"}
if payload is not None:
headers["Content-Type"] = "application/json"
data = json.dumps(payload).encode("utf-8")
req = request.Request(url, method=method, data=data, headers=headers)
try:
with request.urlopen(req, timeout=60) as response:
body = response.read()
# Vast.ai returns varied error formats; surface whatever body we get
except error.HTTPError as exc:
details = exc.read().decode("utf-8", errors="replace")
raise VastApiError(f"{method} {path} failed with {exc.code}: {details}") from exc
except error.URLError as exc:
raise VastApiError(f"{method} {path} failed: {exc.reason}") from exc
if not body:
return None
return json.loads(body)
# Fetch the currently authenticated user's profile
def show_user(self) -> dict[str, Any]:
return self._request("GET", "/api/v0/users/current/")
# ── SSH keys ────────────────────────────────────────────────────────
# List registered SSH keys; handles inconsistent response shapes from the API
def show_ssh_keys(self) -> list[dict[str, Any]]:
response = self._request("GET", "/api/v0/ssh/")
if isinstance(response, list):
return response
if isinstance(response, dict):
for key in ("keys", "ssh_keys"):
value = response.get(key)
if isinstance(value, list):
return value
raise VastApiError(f"Unexpected SSH key response: {response}")
# Register the public key if it isn't already present
def ensure_ssh_key(self, public_key: str) -> None:
existing_keys = self.show_ssh_keys()
if any(
(
item.get("key")
or item.get("public_key")
or item.get("ssh_key")
or ""
).strip() == public_key
for item in existing_keys
):
return
self._request("POST", "/api/v0/ssh/", {"ssh_key": public_key})
# Authorise an SSH key for a running instance
def attach_ssh_key(self, instance_id: int, public_key: str) -> None:
self._request("POST", f"/api/v0/instances/{instance_id}/ssh/", {"ssh_key": public_key})
# ── Offers ─────────────────────────────────────────────────────────
# Search available GPU offers matching a query filter
def search_offers(self, query: dict[str, Any]) -> list[dict[str, Any]]:
response = self._request("POST", "/api/v0/bundles/", query)
offers = response.get("offers", [])
if isinstance(offers, dict):
return [offers]
return offers
# ── Instances ──────────────────────────────────────────────────────
# Rent an offer, returning the new contract (instance) ID
def create_instance(self, offer_id: int, payload: dict[str, Any]) -> int:
response = self._request("PUT", f"/api/v0/asks/{offer_id}/", payload)
if not response or not response.get("success"):
raise VastApiError(f"Instance creation failed for offer {offer_id}: {response}")
return int(response["new_contract"])
# Fetch current status and connection details for an instance
def show_instance(self, instance_id: int) -> VastInstance:
response = self._request("GET", f"/api/v0/instances/{instance_id}/")
raw = response.get("instances")
if not raw:
raise VastApiError(f"No instance details found for {instance_id}: {response}")
return VastInstance(
id=int(raw["id"]),
actual_status=raw.get("actual_status", ""),
ssh_host=raw.get("ssh_host"),
ssh_port=raw.get("ssh_port"),
public_ipaddr=raw.get("public_ipaddr"),
gpu_name=raw.get("gpu_name"),
dph_total=raw.get("dph_total"),
raw=raw,
)
# Permanently destroy an instance (releases the GPU and billing)
def destroy_instance(self, instance_id: int) -> None:
self._request("DELETE", f"/api/v0/instances/{instance_id}/")