Skip to content

Commit

Permalink
[Experimental] feat/track-model-times
Browse files Browse the repository at this point in the history
  • Loading branch information
HudsonGraeme committed Jan 28, 2025
1 parent 1d3b5fe commit 25f0e8e
Show file tree
Hide file tree
Showing 32 changed files with 1,135 additions and 702 deletions.
1 change: 1 addition & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"logrows",
"Mbps",
"metagraph",
"ndarray",
"netuid",
"Omron",
"onnxruntime",
Expand Down
52 changes: 37 additions & 15 deletions neurons/_miner/miner_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import time
import traceback
from typing import Tuple, Union
from rich.console import Console
from rich.table import Table

import bittensor as bt
import websocket
Expand Down Expand Up @@ -56,21 +58,33 @@ def start_axon(self):
)
bt.logging.info("Attached forward functions to axon")

# Start the miner's axon, making it active on the network.
bt.logging.info(f"Starting axon server: {axon.info()}")
axon.start()
bt.logging.info(f"Started axon server: {axon.info()}")

# Serve passes the axon information to the network + netuid we are hosting on.
# This will auto-update if the axon port of external ip has changed.
existing_axon = self.metagraph.axons[self.subnet_uid]

if (
existing_axon
and existing_axon.port == axon.external_port
and existing_axon.ip == axon.external_ip
):
bt.logging.debug(
f"Axon already serving on ip {axon.external_ip} and port {axon.external_port}"
)
return
bt.logging.info(
f"Serving axon on network: {self.subtensor.chain_endpoint} with netuid: {cli_parser.config.netuid}"
)

axon.serve(netuid=cli_parser.config.netuid, subtensor=self.subtensor)
bt.logging.info(
f"Served axon on network: {self.subtensor.chain_endpoint} with netuid: {cli_parser.config.netuid}"
)

# Start the miner's axon, making it active on the network.
bt.logging.info(f"Starting axon server: {axon.info()}")
axon.start()
bt.logging.info(f"Started axon server: {axon.info()}")

self.axon = axon

def run(self):
Expand Down Expand Up @@ -113,16 +127,25 @@ def run(self):
self.metagraph = self.subtensor.metagraph(
cli_parser.config.netuid
)
bt.logging.info(
f"Step:{step} | "
f"Block:{self.metagraph.block.item()} | "
f"Stake:{self.metagraph.S[self.subnet_uid]} | "
f"Rank:{self.metagraph.R[self.subnet_uid]} | "
f"Trust:{self.metagraph.T[self.subnet_uid]} | "
f"Consensus:{self.metagraph.C[self.subnet_uid]} | "
f"Incentive:{self.metagraph.I[self.subnet_uid]} | "
f"Emission:{self.metagraph.E[self.subnet_uid]}"
table = Table(title=f"Miner Status (UID: {self.subnet_uid})")
table.add_column("Block", justify="center", style="cyan")
table.add_column("Stake", justify="center", style="cyan")
table.add_column("Rank", justify="center", style="cyan")
table.add_column("Trust", justify="center", style="cyan")
table.add_column("Consensus", justify="center", style="cyan")
table.add_column("Incentive", justify="center", style="cyan")
table.add_column("Emission", justify="center", style="cyan")
table.add_row(
str(self.metagraph.block.item()),
str(self.metagraph.S[self.subnet_uid]),
str(self.metagraph.R[self.subnet_uid]),
str(self.metagraph.T[self.subnet_uid]),
str(self.metagraph.C[self.subnet_uid]),
str(self.metagraph.I[self.subnet_uid]),
str(self.metagraph.E[self.subnet_uid]),
)
console = Console()
console.print(table)
except Exception:
bt.logging.warning(
f"Failed to sync metagraph: {traceback.format_exc()}"
Expand Down Expand Up @@ -152,7 +175,6 @@ def check_register(self, should_exit=False):
else:
# Each miner gets a unique identity (UID) in the network for differentiation.
subnet_uid = self.metagraph.hotkeys.index(self.wallet.hotkey.ss58_address)
bt.logging.info(f"Running miner on uid: {subnet_uid}")
self.subnet_uid = subnet_uid

def configure(self):
Expand Down
24 changes: 20 additions & 4 deletions neurons/_validator/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
from _validator.models.poc_rpc_request import ProofOfComputationRPCRequest
from _validator.models.pow_rpc_request import ProofOfWeightsRPCRequest
import hashlib
from constants import MAX_SIGNATURE_LIFESPAN, MAINNET_TESTNET_UIDS
from constants import (
MAX_SIGNATURE_LIFESPAN,
MAINNET_TESTNET_UIDS,
VALIDATOR_REQUEST_TIMEOUT_SECONDS,
EXTERNAL_REQUEST_QUEUE_TIME_SECONDS,
)
from _validator.config import ValidatorConfig
import base64
import substrateinterface
Expand Down Expand Up @@ -63,7 +68,7 @@ def _setup_api(self) -> None:

self.setup_rpc_methods()
self.start_server()
bt.logging.success("WebSocket API server started")
bt.logging.success("Ready to serve external requests")

def setup_rpc_methods(self) -> None:
@self.app.websocket("/rpc")
Expand Down Expand Up @@ -131,7 +136,8 @@ async def omron_proof_of_weights(
try:
await asyncio.wait_for(
self.pending_requests[external_request.hash].wait(),
timeout=900,
timeout=VALIDATOR_REQUEST_TIMEOUT_SECONDS
+ EXTERNAL_REQUEST_QUEUE_TIME_SECONDS,
)
result = self.request_results.pop(external_request.hash, None)

Expand Down Expand Up @@ -180,6 +186,16 @@ def start_server(self):
axon = bt.axon(
wallet=self.config.wallet, external_port=self.config.api.port
)
existing_axon = self.config.metagraph.axons[self.config.user_uid]
if (
existing_axon
and existing_axon.port == axon.external_port
and existing_axon.ip == axon.external_ip
):
bt.logging.debug(
f"Axon already serving on ip {axon.external_ip} and port {axon.external_port}"
)
return
axon.serve(self.config.bt_config.netuid, self.config.subtensor)
bt.logging.success("Axon served")
except Exception as e:
Expand Down Expand Up @@ -257,7 +273,7 @@ def commit_cert_hash(self):
bt.logging.error(f"Error committing certificate hash: {str(e)}")
traceback.print_exc()
else:
bt.logging.info("Certificate hash already committed to chain.")
bt.logging.debug("Certificate hash already committed to chain.")

def set_request_result(self, request_hash: str, result: dict[str, any]):
"""Set the result for a pending request and signal its completion."""
Expand Down
3 changes: 2 additions & 1 deletion neurons/_validator/api/certificate_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import time
from OpenSSL import crypto
import bittensor as bt
from constants import ONE_YEAR


class CertificateManager:
Expand All @@ -26,7 +27,7 @@ def _generate_certificate(self, cn: str) -> None:
cert.get_subject().CN = cn
cert.set_serial_number(int(time.time()))
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(2 * 365 * 24 * 60 * 60)
cert.gmtime_adj_notAfter(2 * ONE_YEAR)
cert.set_issuer(cert.get_subject())
cert.set_pubkey(key)
cert.sign(key, "sha256")
Expand Down
206 changes: 0 additions & 206 deletions neurons/_validator/core/api.py

This file was deleted.

Loading

0 comments on commit 25f0e8e

Please sign in to comment.