Skip to content

Commit 3b9f9f2

Browse files
committed
feat: auto-pull docker images and add refresh challenges command
- Add auto-pull of Docker images before starting challenge containers - Add RefreshChallenges SudoAction to re-pull and restart challenges - Add csudo refresh command (refresh all / refresh challenge) - Pass VALIDATOR_HOTKEY and OWNER_HOTKEY env vars to challenge containers - Fix term-challenge owner hotkey showing zeros in production
1 parent 14bc231 commit 3b9f9f2

9 files changed

Lines changed: 262 additions & 0 deletions

File tree

bins/csudo/src/main.rs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ enum Commands {
101101
/// Emergency commands
102102
#[command(subcommand)]
103103
Emergency(EmergencyCommands),
104+
105+
/// Refresh challenges (re-pull images and restart containers on all validators)
106+
#[command(subcommand)]
107+
Refresh(RefreshCommands),
104108
}
105109

106110
#[derive(Subcommand, Debug)]
@@ -208,6 +212,17 @@ enum EmergencyCommands {
208212
Resume,
209213
}
210214

215+
#[derive(Subcommand, Debug)]
216+
enum RefreshCommands {
217+
/// Refresh all challenges (re-pull images and restart containers)
218+
All,
219+
/// Refresh a specific challenge
220+
Challenge {
221+
/// Challenge ID (optional - select from list if not provided)
222+
id: Option<String>,
223+
},
224+
}
225+
211226
// ==================== State Fetching ====================
212227

213228
#[derive(Debug, Clone, Default)]
@@ -1244,6 +1259,73 @@ async fn main() -> Result<()> {
12441259
submit_action(&args.rpc, &keypair, SudoAction::Resume).await?;
12451260
}
12461261
},
1262+
1263+
Commands::Refresh(cmd) => match cmd {
1264+
RefreshCommands::All => {
1265+
println!(
1266+
"{}",
1267+
"Requesting all validators to re-pull and restart challenges..."
1268+
.bright_yellow()
1269+
);
1270+
if Confirm::with_theme(&ColorfulTheme::default())
1271+
.with_prompt("This will restart all challenge containers on all validators. Continue?")
1272+
.default(true)
1273+
.interact()?
1274+
{
1275+
submit_action(
1276+
&args.rpc,
1277+
&keypair,
1278+
SudoAction::RefreshChallenges { challenge_id: None },
1279+
)
1280+
.await?;
1281+
}
1282+
}
1283+
RefreshCommands::Challenge { id } => {
1284+
let state = fetch_chain_state(&args.rpc).await?;
1285+
1286+
let challenge = if let Some(id) = id {
1287+
state
1288+
.challenges
1289+
.iter()
1290+
.find(|c| c.id.starts_with(&id))
1291+
.ok_or_else(|| anyhow::anyhow!("Challenge not found: {}", id))?
1292+
} else {
1293+
if state.challenges.is_empty() {
1294+
println!("{}", "No challenges to refresh.".yellow());
1295+
return Ok(());
1296+
}
1297+
1298+
let options: Vec<String> = state
1299+
.challenges
1300+
.iter()
1301+
.map(|c| format!("{} ({})", c.name, &c.id[..8]))
1302+
.collect();
1303+
1304+
let selection = FuzzySelect::with_theme(&ColorfulTheme::default())
1305+
.with_prompt("Select challenge to refresh")
1306+
.items(&options)
1307+
.interact()?;
1308+
1309+
&state.challenges[selection]
1310+
};
1311+
1312+
println!(
1313+
"Refreshing challenge: {} ({})",
1314+
challenge.name.green(),
1315+
&challenge.id[..8]
1316+
);
1317+
1318+
let challenge_id = ChallengeId(uuid::Uuid::parse_str(&challenge.id)?);
1319+
submit_action(
1320+
&args.rpc,
1321+
&keypair,
1322+
SudoAction::RefreshChallenges {
1323+
challenge_id: Some(challenge_id),
1324+
},
1325+
)
1326+
.await?;
1327+
}
1328+
},
12471329
}
12481330

12491331
Ok(())

bins/validator-node/src/main.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,10 @@ async fn main() -> Result<()> {
272272
info!("Validator hotkey: {}", keypair.ss58_address());
273273
debug!("Validator hotkey (hex): {}", keypair.hotkey().to_hex());
274274

275+
// Set VALIDATOR_HOTKEY env var for challenge containers
276+
// This allows challenge containers to authenticate and sign P2P messages
277+
std::env::set_var("VALIDATOR_HOTKEY", keypair.hotkey().to_hex());
278+
275279
// The identity seed for P2P is derived from the hotkey (public key)
276280
// This ensures the peer ID corresponds to the SS58 address
277281
let identity_seed = keypair.hotkey().0;
@@ -350,6 +354,14 @@ async fn main() -> Result<()> {
350354
Arc::new(RwLock::new(state))
351355
};
352356

357+
// Set OWNER_HOTKEY env var for challenge containers
358+
// This allows challenge containers to identify the subnet owner for sudo operations
359+
{
360+
let state = chain_state.read();
361+
std::env::set_var("OWNER_HOTKEY", state.sudo_key.to_hex());
362+
info!("Owner hotkey set: {}", state.sudo_key.to_hex());
363+
}
364+
353365
// Initialize network protection (DDoS + stake validation)
354366
let protection_config = ProtectionConfig {
355367
min_stake_rao, // Configurable minimum stake
@@ -2509,6 +2521,28 @@ async fn handle_message(
25092521
}
25102522
}
25112523
}
2524+
SudoAction::RefreshChallenges { challenge_id } => {
2525+
if let Some(orchestrator) = challenge_orchestrator {
2526+
match challenge_id {
2527+
Some(id) => {
2528+
info!("Refreshing challenge: {:?}", id);
2529+
if let Err(e) = orchestrator.refresh_challenge(id).await {
2530+
error!("Failed to refresh challenge: {}", e);
2531+
} else {
2532+
info!("Challenge refreshed successfully");
2533+
}
2534+
}
2535+
None => {
2536+
info!("Refreshing all challenges (re-pulling images)");
2537+
if let Err(e) = orchestrator.refresh_all_challenges().await {
2538+
error!("Failed to refresh challenges: {}", e);
2539+
} else {
2540+
info!("All challenges refreshed successfully");
2541+
}
2542+
}
2543+
}
2544+
}
2545+
}
25122546
SudoAction::SetRequiredVersion {
25132547
min_version,
25142548
recommended_version,

crates/challenge-orchestrator/src/docker.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,14 @@ impl DockerClient {
344344
if let Ok(dev_mode) = std::env::var("DEVELOPMENT_MODE") {
345345
env.push(format!("DEVELOPMENT_MODE={}", dev_mode));
346346
}
347+
// Pass validator hotkey (from platform validator) for P2P signing
348+
if let Ok(validator_hotkey) = std::env::var("VALIDATOR_HOTKEY") {
349+
env.push(format!("VALIDATOR_HOTKEY={}", validator_hotkey));
350+
}
351+
// Pass owner/sudo hotkey for challenge sudo operations
352+
if let Ok(owner_hotkey) = std::env::var("OWNER_HOTKEY") {
353+
env.push(format!("OWNER_HOTKEY={}", owner_hotkey));
354+
}
347355

348356
// Create container config
349357
let container_config = Config {

crates/challenge-orchestrator/src/lib.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,14 @@ impl ChallengeOrchestrator {
8282

8383
/// Add and start a new challenge
8484
pub async fn add_challenge(&self, config: ChallengeContainerConfig) -> anyhow::Result<()> {
85+
// Pull image first to ensure it's available
86+
tracing::info!(
87+
image = %config.docker_image,
88+
challenge = %config.name,
89+
"Pulling Docker image before starting challenge"
90+
);
91+
self.docker.pull_image(&config.docker_image).await?;
92+
8593
let instance = self.docker.start_challenge(&config).await?;
8694
self.challenges
8795
.write()
@@ -90,6 +98,65 @@ impl ChallengeOrchestrator {
9098
Ok(())
9199
}
92100

101+
/// Refresh a challenge (re-pull image and restart container)
102+
pub async fn refresh_challenge(&self, challenge_id: ChallengeId) -> anyhow::Result<()> {
103+
// Get current config
104+
let instance = self
105+
.challenges
106+
.read()
107+
.get(&challenge_id)
108+
.cloned()
109+
.ok_or_else(|| anyhow::anyhow!("Challenge not found: {}", challenge_id))?;
110+
111+
tracing::info!(
112+
challenge_id = %challenge_id,
113+
image = %instance.image,
114+
"Refreshing challenge (re-pulling image and restarting)"
115+
);
116+
117+
// Stop current container
118+
self.docker.stop_container(&instance.container_id).await?;
119+
120+
// Re-pull the image (force fresh pull)
121+
self.docker.pull_image(&instance.image).await?;
122+
123+
// We need the full config to restart - get it from state or recreate
124+
// For now, create a minimal config from the instance
125+
let config = ChallengeContainerConfig {
126+
challenge_id,
127+
name: format!("challenge-{}", challenge_id),
128+
docker_image: instance.image.clone(),
129+
mechanism_id: 0, // Default, should be stored
130+
emission_weight: 1.0,
131+
timeout_secs: 3600,
132+
cpu_cores: 2.0,
133+
memory_mb: 4096,
134+
gpu_required: false,
135+
};
136+
137+
// Start new container
138+
let new_instance = self.docker.start_challenge(&config).await?;
139+
self.challenges.write().insert(challenge_id, new_instance);
140+
141+
tracing::info!(challenge_id = %challenge_id, "Challenge refreshed successfully");
142+
Ok(())
143+
}
144+
145+
/// Refresh all challenges (re-pull images and restart all containers)
146+
pub async fn refresh_all_challenges(&self) -> anyhow::Result<()> {
147+
let challenge_ids: Vec<ChallengeId> = self.challenges.read().keys().cloned().collect();
148+
149+
tracing::info!(count = challenge_ids.len(), "Refreshing all challenges");
150+
151+
for id in challenge_ids {
152+
if let Err(e) = self.refresh_challenge(id).await {
153+
tracing::error!(challenge_id = %id, error = %e, "Failed to refresh challenge");
154+
}
155+
}
156+
157+
Ok(())
158+
}
159+
93160
/// Update a challenge (pull new image, restart container)
94161
pub async fn update_challenge(&self, config: ChallengeContainerConfig) -> anyhow::Result<()> {
95162
// Stop old container if exists - get container_id first to avoid holding lock across await

crates/consensus/src/governance_integration.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ fn sudo_action_to_governance_type(action: &SudoAction) -> GovernanceActionType {
245245
SudoAction::EmergencyPause { .. } => GovernanceActionType::EmergencyPause,
246246
SudoAction::Resume => GovernanceActionType::Resume,
247247
SudoAction::ForceStateUpdate { .. } => GovernanceActionType::ForceStateUpdate,
248+
SudoAction::RefreshChallenges { .. } => GovernanceActionType::UpdateChallenge, // Reuse UpdateChallenge type
248249
}
249250
}
250251

@@ -255,6 +256,10 @@ fn generate_proposal_title(action: &SudoAction) -> String {
255256
SudoAction::AddChallenge { config } => format!("Add Challenge: {}", config.name),
256257
SudoAction::UpdateChallenge { config } => format!("Update Challenge: {}", config.name),
257258
SudoAction::RemoveChallenge { id } => format!("Remove Challenge: {:?}", id),
259+
SudoAction::RefreshChallenges { challenge_id } => match challenge_id {
260+
Some(id) => format!("Refresh Challenge: {:?}", id),
261+
None => "Refresh All Challenges".to_string(),
262+
},
258263
SudoAction::SetChallengeWeight { challenge_id, .. } => {
259264
format!("Set Weight for Challenge: {:?}", challenge_id)
260265
}
@@ -423,6 +428,13 @@ fn apply_sudo_action(state: &mut ChainState, action: &SudoAction) -> Result<()>
423428
*state = new_state.clone();
424429
warn!("Force state update applied");
425430
}
431+
SudoAction::RefreshChallenges { challenge_id } => {
432+
// RefreshChallenges doesn't modify state - handled by orchestrator
433+
match challenge_id {
434+
Some(id) => info!("Challenge refresh requested: {:?}", id),
435+
None => info!("All challenges refresh requested"),
436+
}
437+
}
426438
}
427439

428440
state.update_hash();

crates/consensus/src/pbft.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,14 @@ impl PBFTEngine {
264264
state.remove_challenge(&id);
265265
info!("Challenge removed: {:?}", id);
266266
}
267+
SudoAction::RefreshChallenges { challenge_id } => {
268+
// RefreshChallenges is handled by the orchestrator, not state
269+
// Just log it here
270+
match challenge_id {
271+
Some(id) => info!("Challenge refresh requested: {:?}", id),
272+
None => info!("All challenges refresh requested"),
273+
}
274+
}
267275
SudoAction::SetRequiredVersion {
268276
min_version,
269277
recommended_version,

crates/consensus/src/stake_weighted_pbft.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,14 @@ impl StakeWeightedPBFT {
544544
state.remove_challenge(&id);
545545
info!("Challenge removed: {:?}", id);
546546
}
547+
SudoAction::RefreshChallenges { challenge_id } => {
548+
// RefreshChallenges is handled by the orchestrator, not state
549+
// Just log it here
550+
match challenge_id {
551+
Some(id) => info!("Challenge refresh requested: {:?}", id),
552+
None => info!("All challenges refresh requested"),
553+
}
554+
}
547555
SudoAction::SetRequiredVersion {
548556
min_version,
549557
recommended_version,

crates/core/src/message.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,13 @@ pub enum SudoAction {
273273
/// Remove a challenge
274274
RemoveChallenge { id: ChallengeId },
275275

276+
/// Refresh challenges (re-pull images and restart containers)
277+
/// Used when challenge images are updated on the registry
278+
RefreshChallenges {
279+
/// Optional: specific challenge ID to refresh. If None, refresh all.
280+
challenge_id: Option<ChallengeId>,
281+
},
282+
276283
// === Weight Allocation ===
277284
/// Set challenge weight ratio on a mechanism (0.0 - 1.0)
278285
/// Remaining weight goes to UID 0 (burn) unless other challenges share the mechanism

crates/rpc-server/src/jsonrpc.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,42 @@ impl RpcHandler {
14281428
}
14291429
}
14301430
}
1431+
platform_core::ProposalAction::Sudo(
1432+
platform_core::SudoAction::RefreshChallenges { challenge_id },
1433+
) => {
1434+
info!("RefreshChallenges action received: {:?}", challenge_id);
1435+
// Trigger orchestrator to refresh (re-pull and restart)
1436+
if let Some(tx) = self.orchestrator_tx.read().as_ref() {
1437+
match challenge_id {
1438+
Some(id) => {
1439+
// Refresh specific challenge - get config and send update
1440+
let config = {
1441+
self.chain_state.read().challenge_configs.get(id).cloned()
1442+
};
1443+
if let Some(config) = config {
1444+
if let Err(e) = tx.send(OrchestratorCommand::Update(config)) {
1445+
warn!("Failed to send refresh to orchestrator: {}", e);
1446+
}
1447+
}
1448+
}
1449+
None => {
1450+
// Refresh all - send update for each challenge
1451+
let configs: Vec<_> = self
1452+
.chain_state
1453+
.read()
1454+
.challenge_configs
1455+
.values()
1456+
.cloned()
1457+
.collect();
1458+
for config in configs {
1459+
if let Err(e) = tx.send(OrchestratorCommand::Update(config)) {
1460+
warn!("Failed to send refresh to orchestrator: {}", e);
1461+
}
1462+
}
1463+
}
1464+
}
1465+
}
1466+
}
14311467
_ => {
14321468
// Other sudo actions - just apply to state
14331469
}

0 commit comments

Comments
 (0)