Skip to content

Commit 1cb9fed

Browse files
authored
Add omdb command to dry-run a region allocation (#7970)
Sometimes it's useful to be able to dry-run the region allocation query, so add a command to: - start a transaction - run the region allocation query - abort the transaction - print the allocation results
1 parent aeb27f0 commit 1cb9fed

File tree

1 file changed

+198
-0
lines changed
  • dev-tools/omdb/src/bin/omdb

1 file changed

+198
-0
lines changed

dev-tools/omdb/src/bin/omdb/db.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ use internal_dns_types::names::ServiceName;
5555
use ipnetwork::IpNetwork;
5656
use itertools::Itertools;
5757
use nexus_config::PostgresConfigWithUrl;
58+
use nexus_config::RegionAllocationStrategy;
5859
use nexus_db_model::CrucibleDataset;
5960
use nexus_db_model::Disk;
6061
use nexus_db_model::DnsGroup;
@@ -117,11 +118,14 @@ use nexus_db_queries::db::model::ServiceKind;
117118
use nexus_db_queries::db::pagination::Paginator;
118119
use nexus_db_queries::db::pagination::paginated;
119120
use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL;
121+
use nexus_db_queries::db::queries::region_allocation;
122+
use nexus_db_queries::transaction_retry::OptionalError;
120123
use nexus_types::deployment::Blueprint;
121124
use nexus_types::deployment::BlueprintZoneDisposition;
122125
use nexus_types::deployment::BlueprintZoneType;
123126
use nexus_types::deployment::DiskFilter;
124127
use nexus_types::deployment::SledFilter;
128+
use nexus_types::external_api::params;
125129
use nexus_types::external_api::views::PhysicalDiskPolicy;
126130
use nexus_types::external_api::views::PhysicalDiskState;
127131
use nexus_types::external_api::views::SledPolicy;
@@ -132,6 +136,7 @@ use nexus_types::internal_api::params::Srv;
132136
use nexus_types::inventory::CabooseWhich;
133137
use nexus_types::inventory::Collection;
134138
use nexus_types::inventory::RotPageWhich;
139+
use omicron_common::api::external;
135140
use omicron_common::api::external::DataPageParams;
136141
use omicron_common::api::external::Generation;
137142
use omicron_common::api::external::InstanceState;
@@ -734,6 +739,9 @@ enum RegionCommands {
734739

735740
/// Find deleted volume regions
736741
FindDeletedVolumeRegions,
742+
743+
/// Perform an dry-run allocation and return what was selected
744+
DryRunRegionAllocation(DryRunRegionAllocationArgs),
737745
}
738746

739747
#[derive(Debug, Args, Clone)]
@@ -752,6 +760,35 @@ struct RegionUsedByArgs {
752760
region_id: Vec<Uuid>,
753761
}
754762

763+
#[derive(Debug, Args, Clone)]
764+
struct DryRunRegionAllocationArgs {
765+
/// Specify to consider associated region snapshots as existing region
766+
/// allocations (i.e. do not allocate a new read-only region on the same
767+
/// sled as a related region snapshot)
768+
#[arg(long)]
769+
snapshot_id: Option<Uuid>,
770+
771+
#[arg(long)]
772+
block_size: u32,
773+
774+
/// The size of the virtual disk
775+
#[arg(long)]
776+
size: i64,
777+
778+
/// Should the allocated regions be restricted to distinct sleds?
779+
#[arg(long)]
780+
distinct_sleds: bool,
781+
782+
/// How many regions are required?
783+
#[arg(long, short, default_value_t = 3)]
784+
num_regions_required: usize,
785+
786+
/// the (optional) Volume to associate the new regions with (defaults to a
787+
/// random ID)
788+
#[arg(long, short)]
789+
volume_id: Option<VolumeUuid>,
790+
}
791+
755792
#[derive(Debug, Args, Clone)]
756793
struct RegionReplacementArgs {
757794
#[command(subcommand)]
@@ -1227,6 +1264,9 @@ impl DbArgs {
12271264
DbCommands::Region(RegionArgs {
12281265
command: RegionCommands::FindDeletedVolumeRegions,
12291266
}) => cmd_db_region_find_deleted(&datastore).await,
1267+
DbCommands::Region(RegionArgs {
1268+
command: RegionCommands::DryRunRegionAllocation(args),
1269+
}) => cmd_db_dry_run_region_allocation(&opctx, &datastore, args).await,
12301270
DbCommands::RegionReplacement(RegionReplacementArgs {
12311271
command: RegionReplacementCommands::List(args),
12321272
}) => {
@@ -3672,6 +3712,164 @@ async fn cmd_db_region_find_deleted(
36723712
Ok(())
36733713
}
36743714

3715+
#[derive(Debug)]
3716+
enum DryRunRegionAllocationResult {
3717+
QueryError { e: region_allocation::AllocationQueryError },
3718+
3719+
Success { datasets_and_regions: Vec<(CrucibleDataset, Region)> },
3720+
}
3721+
3722+
async fn cmd_db_dry_run_region_allocation(
3723+
opctx: &OpContext,
3724+
datastore: &DataStore,
3725+
args: &DryRunRegionAllocationArgs,
3726+
) -> Result<(), anyhow::Error> {
3727+
let volume_id = match args.volume_id {
3728+
Some(v) => v,
3729+
None => VolumeUuid::new_v4(),
3730+
};
3731+
3732+
let size: external::ByteCount = args.size.try_into()?;
3733+
let block_size: params::BlockSize = args.block_size.try_into()?;
3734+
3735+
let (blocks_per_extent, extent_count) = DataStore::get_crucible_allocation(
3736+
&block_size.try_into().unwrap(),
3737+
size,
3738+
);
3739+
3740+
let allocation_strategy = if args.distinct_sleds {
3741+
RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }
3742+
} else {
3743+
RegionAllocationStrategy::Random { seed: None }
3744+
};
3745+
3746+
let err = OptionalError::<DryRunRegionAllocationResult>::new();
3747+
let conn = datastore.pool_connection_for_tests().await?;
3748+
3749+
let result: Result<std::convert::Infallible, diesel::result::Error> =
3750+
datastore
3751+
.transaction_retry_wrapper("dry_run_region_allocation")
3752+
.transaction(&conn, |conn| {
3753+
let err = err.clone();
3754+
let allocation_strategy = allocation_strategy.clone();
3755+
3756+
async move {
3757+
let query = region_allocation::allocation_query(
3758+
volume_id,
3759+
args.snapshot_id,
3760+
region_allocation::RegionParameters {
3761+
block_size: args.block_size.into(),
3762+
blocks_per_extent,
3763+
extent_count,
3764+
read_only: args.snapshot_id.is_some(),
3765+
},
3766+
&allocation_strategy,
3767+
args.num_regions_required,
3768+
)
3769+
.map_err(|e| {
3770+
err.bail(DryRunRegionAllocationResult::QueryError { e })
3771+
})?;
3772+
3773+
let datasets_and_regions: Vec<(CrucibleDataset, Region)> =
3774+
query.get_results_async(&conn).await?;
3775+
3776+
Err(err.bail(DryRunRegionAllocationResult::Success {
3777+
datasets_and_regions,
3778+
}))
3779+
}
3780+
})
3781+
.await;
3782+
3783+
let datasets_and_regions = match result {
3784+
Ok(_) => {
3785+
panic!("should not have succeeded!");
3786+
}
3787+
3788+
Err(e) => {
3789+
if let Some(result) = err.take() {
3790+
match result {
3791+
DryRunRegionAllocationResult::QueryError { e } => {
3792+
let err: external::Error = e.into();
3793+
Err(err)?
3794+
}
3795+
3796+
DryRunRegionAllocationResult::Success {
3797+
datasets_and_regions,
3798+
} => datasets_and_regions,
3799+
}
3800+
} else {
3801+
Err(e)?
3802+
}
3803+
}
3804+
};
3805+
3806+
#[derive(Tabled)]
3807+
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
3808+
struct Row {
3809+
pub region_id: Uuid,
3810+
3811+
pub dataset_id: Uuid,
3812+
pub size_used: i64,
3813+
3814+
pub pool_id: Uuid,
3815+
3816+
#[tabled(display_with = "option_impl_display")]
3817+
pub total_size: Option<i64>,
3818+
3819+
#[tabled(display_with = "option_impl_display")]
3820+
pub size_left: Option<i64>,
3821+
}
3822+
3823+
let mut rows = Vec::with_capacity(datasets_and_regions.len());
3824+
3825+
let Some(latest_collection) =
3826+
datastore.inventory_get_latest_collection(opctx).await?
3827+
else {
3828+
bail!(
3829+
"failing due to missing inventory - we rely on inventory to \
3830+
calculate zpool sizing info"
3831+
);
3832+
};
3833+
3834+
let mut zpool_total_size: HashMap<Uuid, i64> = HashMap::new();
3835+
3836+
for (_, sled_agent) in latest_collection.sled_agents {
3837+
for zpool in sled_agent.zpools {
3838+
zpool_total_size
3839+
.insert(zpool.id.into_untyped_uuid(), zpool.total_size.into());
3840+
}
3841+
}
3842+
3843+
for (dataset, region) in datasets_and_regions {
3844+
let pool_id = dataset.pool_id.into_untyped_uuid();
3845+
let total_size = zpool_total_size.get(&pool_id);
3846+
rows.push(Row {
3847+
region_id: region.id(),
3848+
3849+
dataset_id: dataset.id().into_untyped_uuid(),
3850+
size_used: dataset.size_used,
3851+
3852+
pool_id,
3853+
total_size: total_size.copied(),
3854+
3855+
size_left: match total_size {
3856+
Some(total_size) => Some(total_size - dataset.size_used),
3857+
None => None,
3858+
},
3859+
});
3860+
}
3861+
3862+
let table = tabled::Table::new(rows)
3863+
.with(tabled::settings::Style::psql())
3864+
.with(tabled::settings::Padding::new(0, 1, 0, 0))
3865+
.with(tabled::settings::Panel::header("Allocation results"))
3866+
.to_string();
3867+
3868+
println!("{}", table);
3869+
3870+
Ok(())
3871+
}
3872+
36753873
/// List all region replacement requests
36763874
async fn cmd_db_region_replacement_list(
36773875
datastore: &DataStore,

0 commit comments

Comments
 (0)