diff --git a/ydb/core/base/appdata.cpp b/ydb/core/base/appdata.cpp index 59e971ee6b2b..b0bd7b126c25 100644 --- a/ydb/core/base/appdata.cpp +++ b/ydb/core/base/appdata.cpp @@ -70,6 +70,7 @@ struct TAppData::TImpl { NKikimrReplication::TReplicationDefaults ReplicationConfig; NKikimrProto::TDataIntegrityTrailsConfig DataIntegrityTrailsConfig; NKikimrConfig::TDataErasureConfig DataErasureConfig; + NKikimrConfig::THealthCheckConfig HealthCheckConfig; }; TAppData::TAppData( @@ -127,6 +128,7 @@ TAppData::TAppData( , ReplicationConfig(Impl->ReplicationConfig) , DataIntegrityTrailsConfig(Impl->DataIntegrityTrailsConfig) , DataErasureConfig(Impl->DataErasureConfig) + , HealthCheckConfig(Impl->HealthCheckConfig) , KikimrShouldContinue(kikimrShouldContinue) , TracingConfigurator(MakeIntrusive(TimeProvider, RandomProvider)) {} diff --git a/ydb/core/base/appdata_fwd.h b/ydb/core/base/appdata_fwd.h index 0e76490e541b..177c06da2647 100644 --- a/ydb/core/base/appdata_fwd.h +++ b/ydb/core/base/appdata_fwd.h @@ -73,6 +73,7 @@ namespace NKikimrConfig { class TMetadataCacheConfig; class TMemoryControllerConfig; class TFeatureFlags; + class THealthCheckConfig; } namespace NKikimrReplication { @@ -242,6 +243,7 @@ struct TAppData { NKikimrReplication::TReplicationDefaults& ReplicationConfig; NKikimrProto::TDataIntegrityTrailsConfig& DataIntegrityTrailsConfig; NKikimrConfig::TDataErasureConfig& DataErasureConfig; + NKikimrConfig::THealthCheckConfig& HealthCheckConfig; bool EnforceUserTokenRequirement = false; bool EnforceUserTokenCheckRequirement = false; // check token if it was specified bool AllowHugeKeyValueDeletes = true; // delete when all clients limit deletes per request diff --git a/ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp b/ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp index 26c40dd7d125..34e660ca028b 100644 --- a/ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp +++ b/ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp @@ -25,8 +25,8 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) { } } - void CreateEnv(std::unique_ptr& env, std::vector& locations) { - TBlobStorageGroupType groupType = TBlobStorageGroupType::ErasureMirror3dc; + void CreateEnv(std::unique_ptr& env, std::vector& locations, + TBlobStorageGroupType groupType) { const ui32 numNodes = locations.size(); env.reset(new TEnvironmentSetup({ @@ -37,39 +37,49 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) { const ui32 disksPerNode = 1; const ui32 slotsPerDisk = 3; + + env->Runtime->FilterFunction = CatchSanitizeRequests; env->CreateBoxAndPool(disksPerNode, numNodes * disksPerNode * slotsPerDisk / 9); + env->Runtime->FilterFunction = {}; } - Y_UNIT_TEST(Test3dc) { + NActorsInterconnect::TNodeLocation LocationGenerator(ui32 dc, ui32 rack, ui32 unit) { + NActorsInterconnect::TNodeLocation proto; + proto.SetDataCenter(ToString(dc)); + proto.SetRack(ToString(rack)); + proto.SetUnit(ToString(unit)); + return proto; + } + + void Test(TBlobStorageGroupType groupType, ui32 dcs, ui32 racks, ui32 units) { std::vector locations; - TLocationGenerator locationGenerator = [](ui32 dc, ui32 rack, ui32 unit) { - NActorsInterconnect::TNodeLocation proto; - proto.SetDataCenter(ToString(dc)); - proto.SetRack(ToString(rack)); - proto.SetUnit(ToString(unit)); - return proto; - }; - MakeLocations(locations, 3, 5, 1, locationGenerator); + MakeLocations(locations, dcs, racks, units, LocationGenerator); std::unique_ptr env; - CreateEnv(env, locations); - TBlobStorageGroupType groupType = TBlobStorageGroupType::ErasureMirror3dc; - TGroupGeometryInfo geom = CreateGroupGeometry(groupType); + CreateEnv(env, locations, groupType); + + // Assure that sanitizer doesn't send request to initially allocated groups env->Runtime->FilterFunction = CatchSanitizeRequests; + env->UpdateSettings(true, false, true); + env->Sim(TDuration::Minutes(3)); + env->UpdateSettings(false, false, false); + + TGroupGeometryInfo geom = CreateGroupGeometry(groupType); TString error; auto cfg = env->FetchBaseConfig(); UNIT_ASSERT_C(CheckBaseConfigLayout(geom, cfg, true, error), error); - env->Cleanup(); // Shuffle node locayion, assure that layout error occured - std::random_shuffle(locations.begin(), locations.end()); - env->Initialize(); - env->Sim(TDuration::Seconds(100)); - cfg = env->FetchBaseConfig(); - CheckBaseConfigLayout(geom, cfg, true, error); + do { + env->Cleanup(); + std::random_shuffle(locations.begin(), locations.end()); + env->Initialize(); + env->Sim(TDuration::Seconds(100)); + cfg = env->FetchBaseConfig(); + } while (CheckBaseConfigLayout(geom, cfg, true, error)); Cerr << error << Endl; // Sanitize groups @@ -86,6 +96,18 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) { UNIT_ASSERT_C(CheckBaseConfigLayout(geom, cfg, true, error), error); } + Y_UNIT_TEST(Test3dc) { + Test(TBlobStorageGroupType::ErasureMirror3dc, 3, 5, 1); + } + + Y_UNIT_TEST(TestBlock4Plus2) { + Test(TBlobStorageGroupType::Erasure4Plus2Block, 1, 10, 2); + } + + Y_UNIT_TEST(TestMirror3of4) { + Test(TBlobStorageGroupType::ErasureMirror3of4, 1, 10, 2); + } + TString PrintGroups(TBlobStorageGroupType groupType, const NKikimrBlobStorage::TBaseConfig& cfg, std::vector locations) { TGroupGeometryInfo geom = CreateGroupGeometry(groupType); @@ -137,6 +159,7 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) { } void TestMultipleRealmsOccupation(bool allowMultipleRealmsOccupation) { + TBlobStorageGroupType groupType = TBlobStorageGroupType::ErasureMirror3dc; std::vector locations; TLocationGenerator locationGenerator = [](ui32 dc, ui32 rack, ui32 unit) { NActorsInterconnect::TNodeLocation proto; @@ -152,9 +175,8 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) { }; MakeLocations(locations, 4, 5, 1, locationGenerator); std::unique_ptr env; - CreateEnv(env, locations); + CreateEnv(env, locations, groupType); - TBlobStorageGroupType groupType = TBlobStorageGroupType::ErasureMirror3dc; TGroupGeometryInfo geom = CreateGroupGeometry(groupType); env->Runtime->FilterFunction = CatchSanitizeRequests; diff --git a/ydb/core/cms/console/configs_dispatcher.cpp b/ydb/core/cms/console/configs_dispatcher.cpp index dcbe87627a9c..bac25b3cd72a 100644 --- a/ydb/core/cms/console/configs_dispatcher.cpp +++ b/ydb/core/cms/console/configs_dispatcher.cpp @@ -67,6 +67,7 @@ const THashSet DYNAMIC_KINDS({ (ui32)NKikimrConsole::TConfigItem::BlobStorageConfigItem, (ui32)NKikimrConsole::TConfigItem::MetadataCacheConfigItem, (ui32)NKikimrConsole::TConfigItem::MemoryControllerConfigItem, + (ui32)NKikimrConsole::TConfigItem::HealthCheckConfigItem, }); const THashSet NON_YAML_KINDS({ diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index 30b9e40eb53c..6a13e6472d45 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -1205,6 +1205,10 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig) AppData->ReplicationConfig = runConfig.AppConfig.GetReplicationConfig(); } + if (runConfig.AppConfig.HasHealthCheckConfig()) { + AppData->HealthCheckConfig = runConfig.AppConfig.GetHealthCheckConfig(); + } + // setup resource profiles AppData->ResourceProfiles = new TResourceProfiles; if (runConfig.AppConfig.GetBootstrapConfig().ResourceProfilesSize()) diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index 1e1b093f43cd..25418b25cae2 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include +#include #include #include @@ -121,11 +123,12 @@ class TSelfCheckRequest : public TActorBootstrapped { ui64 Cookie; NWilson::TSpan Span; - TSelfCheckRequest(const TActorId& sender, THolder request, ui64 cookie, NWilson::TTraceId&& traceId) + TSelfCheckRequest(const TActorId& sender, THolder request, ui64 cookie, NWilson::TTraceId&& traceId, const NKikimrConfig::THealthCheckConfig& config) : Sender(sender) , Request(std::move(request)) , Cookie(cookie) , Span(TComponentTracingLevels::TTablet::Basic, std::move(traceId), "health_check", NWilson::EFlags::AUTO_END) + , HealthCheckConfig(config) {} using TGroupId = ui32; @@ -163,7 +166,7 @@ class TSelfCheckRequest : public TActorBootstrapped { struct TNodeTabletState { struct TTabletStateSettings { TInstant AliveBarrier; - ui32 MaxRestartsPerPeriod = 30; // per hour + ui32 MaxRestartsPerPeriod; // per hour ui32 MaxTabletIdsStored = 10; bool ReportGoodTabletsIds = false; }; @@ -266,6 +269,7 @@ class TSelfCheckRequest : public TActorBootstrapped { TString ErasureSpecies; std::vector VSlots; ui32 Generation; + bool LayoutCorrect = true; }; struct TSelfCheckResult { @@ -647,6 +651,8 @@ class TSelfCheckRequest : public TActorBootstrapped { std::optional> DatabaseBoardInfo; THashSet UnknownStaticGroups; + const NKikimrConfig::THealthCheckConfig& HealthCheckConfig; + std::vector SubscribedNodeIds; THashSet StorageNodeIds; THashSet ComputeNodeIds; @@ -742,7 +748,7 @@ class TSelfCheckRequest : public TActorBootstrapped { TTabletRequestsState TabletRequests; - TDuration Timeout = TDuration::MilliSeconds(20000); + TDuration Timeout = TDuration::MilliSeconds(HealthCheckConfig.GetTimeout()); static constexpr TStringBuf STATIC_STORAGE_POOL_NAME = "static"; bool IsSpecificDatabaseFilter() const { @@ -1504,6 +1510,7 @@ class TSelfCheckRequest : public TActorBootstrapped { for (const auto& [hiveId, hiveResponse] : HiveInfo) { if (hiveResponse.IsOk()) { settings.AliveBarrier = TInstant::MilliSeconds(hiveResponse->Record.GetResponseTimestamp()) - TDuration::Minutes(5); + settings.MaxRestartsPerPeriod = HealthCheckConfig.GetThresholds().GetTabletsRestartsOrange(); for (const NKikimrHive::TTabletInfo& hiveTablet : hiveResponse->Record.GetTablets()) { TSubDomainKey tenantId = TSubDomainKey(hiveTablet.GetObjectDomain()); auto itDomain = FilterDomainKey.find(tenantId); @@ -1569,6 +1576,7 @@ class TSelfCheckRequest : public TActorBootstrapped { auto& groupState = GroupState[groupId]; groupState.ErasureSpecies = group.GetInfo().GetErasureSpeciesV2(); groupState.Generation = group.GetInfo().GetGeneration(); + groupState.LayoutCorrect = group.GetInfo().GetLayoutCorrect(); StoragePoolState[poolId].Groups.emplace(groupId); } for (const auto& vSlot : VSlots->Get()->Record.GetEntries()) { @@ -1729,9 +1737,9 @@ class TSelfCheckRequest : public TActorBootstrapped { FillNodeInfo(nodeId, context.Location.mutable_compute()->mutable_node()); TSelfCheckContext rrContext(&context, "NODE_UPTIME"); - if (databaseState.NodeRestartsPerPeriod[nodeId] >= 30) { + if (databaseState.NodeRestartsPerPeriod[nodeId] >= HealthCheckConfig.GetThresholds().GetNodeRestartsOrange()) { rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Node is restarting too often", ETags::Uptime); - } else if (databaseState.NodeRestartsPerPeriod[nodeId] >= 10) { + } else if (databaseState.NodeRestartsPerPeriod[nodeId] >= HealthCheckConfig.GetThresholds().GetNodeRestartsYellow()) { rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "The number of node restarts has increased", ETags::Uptime); } else { rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); @@ -1769,9 +1777,9 @@ class TSelfCheckRequest : public TActorBootstrapped { long timeDifferenceUs = nodeSystemState.GetMaxClockSkewWithPeerUs(); TDuration timeDifferenceDuration = TDuration::MicroSeconds(abs(timeDifferenceUs)); Ydb::Monitoring::StatusFlag::Status status; - if (timeDifferenceDuration > MAX_CLOCKSKEW_ORANGE_ISSUE_TIME) { + if (timeDifferenceDuration > TDuration::MicroSeconds(HealthCheckConfig.GetThresholds().GetNodesTimeDifferenceOrange())) { status = Ydb::Monitoring::StatusFlag::ORANGE; - } else if (timeDifferenceDuration > MAX_CLOCKSKEW_YELLOW_ISSUE_TIME) { + } else if (timeDifferenceDuration > TDuration::MicroSeconds(HealthCheckConfig.GetThresholds().GetNodesTimeDifferenceYellow())) { status = Ydb::Monitoring::StatusFlag::YELLOW; } else { status = Ydb::Monitoring::StatusFlag::GREEN; @@ -2343,6 +2351,7 @@ class TSelfCheckRequest : public TActorBootstrapped { class TGroupChecker { TString ErasureSpecies; + bool LayoutCorrect; int FailedDisks = 0; std::array DisksColors = {}; TStackVec> FailedRealms; @@ -2359,7 +2368,10 @@ class TSelfCheckRequest : public TActorBootstrapped { } public: - TGroupChecker(const TString& erasure) : ErasureSpecies(erasure) {} + TGroupChecker(const TString& erasure, const bool layoutCorrect = true) + : ErasureSpecies(erasure) + , LayoutCorrect(layoutCorrect) + {} void AddVDiskStatus(Ydb::Monitoring::StatusFlag::Status status, ui32 realm) { ++DisksColors[status]; @@ -2378,6 +2390,9 @@ class TSelfCheckRequest : public TActorBootstrapped { void ReportStatus(TSelfCheckContext& context) const { context.OverallStatus = Ydb::Monitoring::StatusFlag::GREEN; + if (!LayoutCorrect) { + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Group layout is incorrect", ETags::GroupState); + } if (ErasureSpecies == NONE) { if (FailedDisks > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState}); @@ -2727,7 +2742,7 @@ class TSelfCheckRequest : public TActorBootstrapped { return; } - TGroupChecker checker(itGroup->second.ErasureSpecies); + TGroupChecker checker(itGroup->second.ErasureSpecies, itGroup->second.LayoutCorrect); const auto& slots = itGroup->second.VSlots; for (const auto* slot : slots) { const auto& slotInfo = slot->GetInfo(); @@ -2921,9 +2936,6 @@ class TSelfCheckRequest : public TActorBootstrapped { } } - const TDuration MAX_CLOCKSKEW_ORANGE_ISSUE_TIME = TDuration::MicroSeconds(25000); - const TDuration MAX_CLOCKSKEW_YELLOW_ISSUE_TIME = TDuration::MicroSeconds(5000); - void FillResult(TOverallStateContext context) { if (IsSpecificDatabaseFilter()) { FillDatabaseResult(context, FilterDatabase, DatabaseState[FilterDatabase]); @@ -3252,12 +3264,16 @@ void TNodeCheckRequest::Bootstrap() { class THealthCheckService : public TActorBootstrapped { public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::MONITORING_SERVICE; } + NKikimrConfig::THealthCheckConfig HealthCheckConfig; THealthCheckService() { } void Bootstrap() { + HealthCheckConfig.CopyFrom(AppData()->HealthCheckConfig); + Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()), + new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({NKikimrConsole::TConfigItem::HealthCheckConfigItem})); TMon* mon = AppData()->Mon; if (mon) { mon->RegisterActorPage({ @@ -3270,8 +3286,16 @@ class THealthCheckService : public TActorBootstrapped { Become(&THealthCheckService::StateWork); } + void Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) { + const auto& record = ev->Get()->Record; + if (record.GetConfig().HasHealthCheckConfig()) { + HealthCheckConfig.CopyFrom(record.GetConfig().GetHealthCheckConfig()); + } + Send(ev->Sender, new NConsole::TEvConsole::TEvConfigNotificationResponse(record), 0, ev->Cookie); + } + void Handle(TEvSelfCheckRequest::TPtr& ev) { - Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie, std::move(ev->TraceId))); + Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie, std::move(ev->TraceId), HealthCheckConfig)); } std::shared_ptr GRpcClientLow; @@ -3299,6 +3323,7 @@ class THealthCheckService : public TActorBootstrapped { hFunc(TEvSelfCheckRequest, Handle); hFunc(TEvNodeCheckRequest, Handle); hFunc(NMon::TEvHttpInfo, Handle); + hFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, Handle); cFunc(TEvents::TSystem::PoisonPill, PassAway); } } diff --git a/ydb/core/health_check/health_check_ut.cpp b/ydb/core/health_check/health_check_ut.cpp index 35854589e00b..b3d2a48a2e57 100644 --- a/ydb/core/health_check/health_check_ut.cpp +++ b/ydb/core/health_check/health_check_ut.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "health_check.cpp" @@ -1961,5 +1962,223 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { UNIT_ASSERT(HasDeadTabletIssue(result)); } + + void SendHealthCheckConfigUpdate(TTestActorRuntime &runtime, const TActorId& sender, const NKikimrConfig::THealthCheckConfig &cfg) { + auto *event = new NConsole::TEvConsole::TEvConfigureRequest; + + event->Record.AddActions()->MutableRemoveConfigItems()->MutableCookieFilter()->AddCookies("cookie"); + + auto &item = *event->Record.AddActions()->MutableAddConfigItem()->MutableConfigItem(); + item.MutableConfig()->MutableHealthCheckConfig()->CopyFrom(cfg); + item.SetCookie("cookie"); + + runtime.SendToPipe(MakeConsoleID(), sender, event, 0, GetPipeConfigWithRetries()); + + TAutoPtr handle; + auto record = runtime.GrabEdgeEvent(handle)->Record; + UNIT_ASSERT_VALUES_EQUAL(record.MutableStatus()->GetCode(), Ydb::StatusIds::SUCCESS); + } + + void ChangeNodeRestartsPerPeriod(TTestActorRuntime &runtime, const TActorId& sender, const ui32 restartsYellow, const ui32 restartsOrange) { + NKikimrConfig::TAppConfig ext; + auto &cfg = *ext.MutableHealthCheckConfig(); + cfg.MutableThresholds()->SetNodeRestartsYellow(restartsYellow); + cfg.MutableThresholds()->SetNodeRestartsOrange(restartsOrange); + SendHealthCheckConfigUpdate(runtime, sender, cfg); + } + + void TestConfigUpdateNodeRestartsPerPeriod(TTestActorRuntime &runtime, const TActorId& sender, const ui32 restartsYellow, const ui32 restartsOrange, const ui32 nodeId, Ydb::Monitoring::StatusFlag::Status expectedStatus) { + ChangeNodeRestartsPerPeriod(runtime, sender, restartsYellow, restartsOrange); + + TAutoPtr handle; + auto *request = new NHealthCheck::TEvSelfCheckRequest; + request->Request.set_return_verbose_status(true); + request->Database = "/Root/database"; + + runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0)); + auto result = runtime.GrabEdgeEvent(handle)->Result; + Ctest << result.ShortDebugString() << Endl; + + const auto &database_status = result.database_status(0); + UNIT_ASSERT_VALUES_EQUAL(database_status.name(), "/Root/database"); + UNIT_ASSERT_VALUES_EQUAL(database_status.compute().overall(), expectedStatus); + UNIT_ASSERT_VALUES_EQUAL(database_status.compute().nodes()[0].id(), ToString(nodeId)); + } + + Y_UNIT_TEST(HealthCheckConfigUpdate) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(1) + .SetDynamicNodeCount(1) + .SetUseRealThreads(false) + .SetDomainName("Root"); + + TServer server(settings); + server.EnableGRpc(grpcPort); + TClient client(settings); + TTestActorRuntime& runtime = *server.GetRuntime(); + TActorId sender = runtime.AllocateEdgeActor(); + + const ui32 nodeRestarts = 10; + const ui32 nodeId = runtime.GetNodeId(1); + auto observerFunc = [&](TAutoPtr& ev) { + switch (ev->GetTypeRewrite()) { + case NConsole::TEvConsole::EvGetTenantStatusResponse: { + auto *x = reinterpret_cast(&ev); + ChangeGetTenantStatusResponse(x, "/Root/database"); + break; + } + case TEvTxProxySchemeCache::EvNavigateKeySetResult: { + auto *x = reinterpret_cast(&ev); + TSchemeCacheNavigate::TEntry& entry((*x)->Get()->Request->ResultSet.front()); + const TString path = CanonizePath(entry.Path); + if (path == "/Root/database" || entry.TableId.PathId == SUBDOMAIN_KEY) { + entry.Status = TSchemeCacheNavigate::EStatus::Ok; + entry.Kind = TSchemeCacheNavigate::EKind::KindExtSubdomain; + entry.Path = {"Root", "database"}; + entry.DomainInfo = MakeIntrusive(SUBDOMAIN_KEY, SUBDOMAIN_KEY); + auto domains = runtime.GetAppData().DomainsInfo; + ui64 hiveId = domains->GetHive(); + entry.DomainInfo->Params.SetHive(hiveId); + } + break; + } + case TEvHive::EvResponseHiveNodeStats: { + auto *x = reinterpret_cast(&ev); + auto &record = (*x)->Get()->Record; + record.ClearNodeStats(); + auto *nodeStats = record.MutableNodeStats()->Add(); + nodeStats->SetNodeId(nodeId); + nodeStats->SetRestartsPerPeriod(nodeRestarts); + nodeStats->MutableNodeDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId); + nodeStats->MutableNodeDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId); + break; + } + case TEvSchemeShard::EvDescribeSchemeResult: { + auto *x = reinterpret_cast(&ev); + auto record = (*x)->Get()->MutableRecord(); + if (record->path() == "/Root/database") { + record->set_status(NKikimrScheme::StatusSuccess); + // no pools + } + break; + } + case TEvBlobStorage::EvControllerConfigResponse: { + auto *x = reinterpret_cast(&ev); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); + break; + } + case NSysView::TEvSysView::EvGetVSlotsResponse: { + auto* x = reinterpret_cast(&ev); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); + break; + } + case NSysView::TEvSysView::EvGetGroupsResponse: { + auto* x = reinterpret_cast(&ev); + AddGroupsToSysViewResponse(x); + break; + } + case NSysView::TEvSysView::EvGetStoragePoolsResponse: { + auto* x = reinterpret_cast(&ev); + AddStoragePoolsToSysViewResponse(x); + break; + } + case TEvWhiteboard::EvSystemStateResponse: { + auto *x = reinterpret_cast(&ev); + ClearLoadAverage(x); + break; + } + case TEvInterconnect::EvNodesInfo: { + auto *x = reinterpret_cast(&ev); + auto nodes = MakeIntrusive>((*x)->Get()->Nodes); + if (!nodes->empty()) { + nodes->erase(nodes->begin() + 1, nodes->end()); + nodes->begin()->NodeId = nodeId; + } + auto newEv = IEventHandle::Downcast( + new IEventHandle((*x)->Recipient, (*x)->Sender, new TEvInterconnect::TEvNodesInfo(nodes)) + ); + x->Swap(newEv); + break; + } + } + + return TTestActorRuntime::EEventAction::PROCESS; + }; + runtime.SetObserverFunc(observerFunc); + + TestConfigUpdateNodeRestartsPerPeriod(runtime, sender, nodeRestarts + 5, nodeRestarts + 10, nodeId, Ydb::Monitoring::StatusFlag::GREEN); + TestConfigUpdateNodeRestartsPerPeriod(runtime, sender, nodeRestarts / 2, nodeRestarts + 5, nodeId, Ydb::Monitoring::StatusFlag::YELLOW); + TestConfigUpdateNodeRestartsPerPeriod(runtime, sender, nodeRestarts / 5, nodeRestarts / 2, nodeId, Ydb::Monitoring::StatusFlag::ORANGE); + } + + Y_UNIT_TEST(LayoutIncorrect) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(1) + .SetDynamicNodeCount(1) + .SetUseRealThreads(false) + .SetDomainName("Root"); + + TServer server(settings); + server.EnableGRpc(grpcPort); + TClient client(settings); + TTestActorRuntime& runtime = *server.GetRuntime(); + TActorId sender = runtime.AllocateEdgeActor(); + + auto observerFunc = [&](TAutoPtr& ev) { + switch (ev->GetTypeRewrite()) { + case NSysView::TEvSysView::EvGetGroupsResponse: { + auto* x = reinterpret_cast(&ev); + auto& record = (*x)->Get()->Record; + for (auto& entry : *record.mutable_entries()) { + entry.mutable_info()->set_layoutcorrect(false); + } + + break; + } + } + + return TTestActorRuntime::EEventAction::PROCESS; + }; + runtime.SetObserverFunc(observerFunc); + + TAutoPtr handle; + auto *request = new NHealthCheck::TEvSelfCheckRequest; + request->Request.set_return_verbose_status(true); + runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0)); + auto result = runtime.GrabEdgeEvent(handle)->Result; + + UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::MAINTENANCE_REQUIRED); + UNIT_ASSERT_VALUES_EQUAL(result.database_status_size(), 1); + const auto &database_status = result.database_status(0); + + UNIT_ASSERT_VALUES_EQUAL(database_status.overall(), Ydb::Monitoring::StatusFlag::ORANGE); + UNIT_ASSERT_VALUES_EQUAL(database_status.storage().overall(), Ydb::Monitoring::StatusFlag::ORANGE); + UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].overall(), Ydb::Monitoring::StatusFlag::ORANGE); + UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].groups().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].groups()[0].overall(), Ydb::Monitoring::StatusFlag::ORANGE); + + for (const auto &issue_log : result.issue_log()) { + if (issue_log.level() == 1 && issue_log.type() == "DATABASE") { + UNIT_ASSERT_VALUES_EQUAL(issue_log.location().database().name(), "/Root"); + UNIT_ASSERT_VALUES_EQUAL(issue_log.message(), "Database has storage issues"); + } else if (issue_log.level() == 2 && issue_log.type() == "STORAGE") { + UNIT_ASSERT_VALUES_EQUAL(issue_log.location().database().name(), "/Root"); + UNIT_ASSERT_VALUES_EQUAL(issue_log.message(), "Storage has no redundancy"); + } else if (issue_log.level() == 3 && issue_log.type() == "STORAGE_POOL") { + UNIT_ASSERT_VALUES_EQUAL(issue_log.location().storage().pool().name(), "static"); + UNIT_ASSERT_VALUES_EQUAL(issue_log.message(), "Pool has no redundancy"); + } else if (issue_log.level() == 4 && issue_log.type() == "STORAGE_GROUP") { + UNIT_ASSERT_VALUES_EQUAL(issue_log.location().storage().pool().name(), "static"); + UNIT_ASSERT_VALUES_EQUAL(issue_log.message(), "Group layout is incorrect"); + } + } + } } } diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index 812b2f69eca4..0a5e2e58fe0b 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -3,6 +3,8 @@ #include "self_heal.h" #include "sys_view.h" #include "console_interaction.h" +#include "group_geometry_info.h" +#include "group_layout_checker.h" #include @@ -82,6 +84,25 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() { } } +void TBlobStorageController::TGroupInfo::CalculateLayoutStatus(TBlobStorageController *self, + TBlobStorageGroupInfo::TTopology *topology, const std::function& getGeom) { + LayoutCorrect = true; + if (VDisksInGroup) { + NLayoutChecker::TGroupLayout layout(*topology); + NLayoutChecker::TDomainMapper mapper; + auto geom = getGeom(); + + for (size_t index = 0; index < VDisksInGroup.size(); ++index) { + const TVSlotInfo *slot = VDisksInGroup[index]; + TPDiskId pdiskId = slot->VSlotId.ComprisingPDiskId(); + const auto& location = self->HostRecords->GetLocation(pdiskId.NodeId); + layout.AddDisk({mapper, location, pdiskId, geom}, index); + } + + LayoutCorrect = layout.IsCorrect(); + } +} + NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology, const TBlobStorageGroupInfo::TGroupVDisks& failed) { auto& checker = *topology->QuorumChecker; diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp index e3f1f199de01..df353cd0b0d0 100644 --- a/ydb/core/mind/bscontroller/config_fit_groups.cpp +++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp @@ -621,6 +621,14 @@ namespace NKikimr { groupInfo->FinishVDisksInGroup(); groupInfo->CalculateGroupStatus(); + groupInfo->CalculateLayoutStatus(&State.Self, groupInfo->Topology.get(), [&] { + const auto& pools = State.StoragePools.Get(); + if (const auto it = pools.find(groupInfo->StoragePoolId); it != pools.end()) { + return TGroupGeometryInfo(groupInfo->Topology->GType, it->second.GetGroupGeometry()); + } + Y_DEBUG_ABORT(); // this can't normally happen + return TGroupGeometryInfo(); + }); return res; } diff --git a/ydb/core/mind/bscontroller/group_geometry_info.h b/ydb/core/mind/bscontroller/group_geometry_info.h index 2e6e0ff14bdc..1d3b7d77b0e3 100644 --- a/ydb/core/mind/bscontroller/group_geometry_info.h +++ b/ydb/core/mind/bscontroller/group_geometry_info.h @@ -11,16 +11,18 @@ namespace NKikimr::NBsController { struct TExFitGroupError : yexception {}; class TGroupGeometryInfo { - const TBlobStorageGroupType Type; - ui32 NumFailRealms; - ui32 NumFailDomainsPerFailRealm; - ui32 NumVDisksPerFailDomain; - ui32 RealmLevelBegin; - ui32 RealmLevelEnd; - ui32 DomainLevelBegin; - ui32 DomainLevelEnd; + TBlobStorageGroupType Type; + ui32 NumFailRealms = 0; + ui32 NumFailDomainsPerFailRealm = 0; + ui32 NumVDisksPerFailDomain = 0; + ui32 RealmLevelBegin = 0; + ui32 RealmLevelEnd = 0; + ui32 DomainLevelBegin = 0; + ui32 DomainLevelEnd = 0; public: + explicit TGroupGeometryInfo() = default; + TGroupGeometryInfo(TBlobStorageGroupType type, NKikimrBlobStorage::TGroupGeometry g) : Type(type) , NumFailRealms(g.GetNumFailRealms()) diff --git a/ydb/core/mind/bscontroller/group_layout_checker.cpp b/ydb/core/mind/bscontroller/group_layout_checker.cpp index 8ab76e3e4f50..31e822eb4fa1 100644 --- a/ydb/core/mind/bscontroller/group_layout_checker.cpp +++ b/ydb/core/mind/bscontroller/group_layout_checker.cpp @@ -1,47 +1,3 @@ #include "group_layout_checker.h" -#include "group_geometry_info.h" - -namespace NKikimr::NBsController { - - TLayoutCheckResult CheckGroupLayout(const TGroupGeometryInfo& geom, const THashMap>& layout) { - using namespace NLayoutChecker; - - if (layout.empty()) { - return {}; - } - - TBlobStorageGroupInfo::TTopology topology(geom.GetType(), geom.GetNumFailRealms(), geom.GetNumFailDomainsPerFailRealm(), - geom.GetNumVDisksPerFailDomain(), true); - TGroupLayout group(topology); - TDomainMapper mapper; - THashMap map; - for (const auto& [vdiskId, p] : layout) { - const auto& [location, pdiskId] = p; - TPDiskLayoutPosition pos(mapper, location, pdiskId, geom); - group.AddDisk(pos, topology.GetOrderNumber(vdiskId)); - map.emplace(vdiskId, pos); - } - - std::vector> scoreboard; - for (const auto& [vdiskId, pos] : map) { - scoreboard.emplace_back(group.GetCandidateScore(pos, topology.GetOrderNumber(vdiskId)), vdiskId); - } - - auto comp1 = [](const auto& x, const auto& y) { return x.second < y.second; }; - std::sort(scoreboard.begin(), scoreboard.end(), comp1); - - auto comp = [](const auto& x, const auto& y) { return x.first.BetterThan(y.first); }; - std::sort(scoreboard.begin(), scoreboard.end(), comp); - TLayoutCheckResult res; - const auto reference = scoreboard.back().first; - if (!reference.SameAs({})) { // not perfectly correct layout - for (; !scoreboard.empty() && !scoreboard.back().first.BetterThan(reference); scoreboard.pop_back()) { - res.Candidates.push_back(scoreboard.back().second); - } - } - return res; - } - -} // NKikimr::NBsController Y_DECLARE_OUT_SPEC(, NKikimr::NBsController::NLayoutChecker::TEntityId, stream, value) { value.Output(stream); } diff --git a/ydb/core/mind/bscontroller/group_layout_checker.h b/ydb/core/mind/bscontroller/group_layout_checker.h index e2e2e66246f0..192a6690c9e5 100644 --- a/ydb/core/mind/bscontroller/group_layout_checker.h +++ b/ydb/core/mind/bscontroller/group_layout_checker.h @@ -177,6 +177,8 @@ namespace NKikimr::NBsController { THashMap NumDisksPerDevice; + bool Correct = true; + TGroupLayout(const TBlobStorageGroupInfo::TTopology& topology) : Topology(topology) , NumDisksInRealm(Topology.GetTotalFailRealmsNum()) @@ -187,17 +189,19 @@ namespace NKikimr::NBsController { void UpdateDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber, ui32 value) { NumDisks += value; - NumDisksPerRealmGroup[pos.RealmGroup] += value; + const ui32 z = NumDisksPerRealmGroup[pos.RealmGroup] += value; const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber); - NumDisksInRealm[vdisk.FailRealm] += value; - NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value; - NumDisksPerRealmTotal[pos.Realm] += value; + const ui32 x1 = NumDisksInRealm[vdisk.FailRealm] += value; + const ui32 x2 = NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value; + const ui32 x3 = NumDisksPerRealmTotal[pos.Realm] += value; const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk); - NumDisksInDomain[domainIdx] += value; - NumDisksPerDomain[domainIdx][pos.Domain] += value; - NumDisksPerDomainTotal[pos.Domain] += value; + const ui32 y1 = NumDisksInDomain[domainIdx] += value; + const ui32 y2 = NumDisksPerDomain[domainIdx][pos.Domain] += value; + const ui32 y3 = NumDisksPerDomainTotal[pos.Domain] += value; NumDisksPerDevice[pos.Device] += value; + + Correct = Correct && x1 == x2 && x2 == x3 && y1 == y2 && y2 == y3 && z == NumDisks; } void AddDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber) { @@ -233,6 +237,46 @@ namespace NKikimr::NBsController { AddDisk(pos, orderNumber); return score; } + + bool IsCorrect() const { +#ifdef NDEBUG + return Correct; +#endif + + if (NumDisksPerRealmGroup.size() != 1) { // all disks must reside in the same realm group + Y_DEBUG_ABORT_UNLESS(!Correct); + return false; + } + + for (size_t i = 0, num = NumDisksInRealm.size(); i < num; ++i) { + for (const auto& [entityId, numDisks] : NumDisksPerRealm[i]) { + Y_DEBUG_ABORT_UNLESS(NumDisksPerRealmTotal.contains(entityId)); + if (numDisks != NumDisksInRealm[i] || numDisks != NumDisksPerRealmTotal.at(entityId)) { + // the first case is when group realm contains disks from different real-world realms (DC's) + // -- this is not as bad as it seems, but breaks strict failure model; the second one is a bit + // worse, it means that disks from this real-world realm (DC) are in several realms, which + // may lead to unavailability when DC goes down + Y_DEBUG_ABORT_UNLESS(!Correct); + return false; + } + } + } + + // the same code goes for domains + for (size_t j = 0, num = NumDisksInDomain.size(); j < num; ++j) { + for (const auto& [entityId, numDisks] : NumDisksPerDomain[j]) { + Y_DEBUG_ABORT_UNLESS(NumDisksPerDomainTotal.contains(entityId)); + if (numDisks != NumDisksInDomain[j] || numDisks != NumDisksPerDomainTotal.at(entityId)) { + Y_DEBUG_ABORT_UNLESS(!Correct); + return false; + } + + } + } + + Y_DEBUG_ABORT_UNLESS(Correct); + return true; + } }; } // NLayoutChecker @@ -245,6 +289,4 @@ namespace NKikimr::NBsController { } }; - TLayoutCheckResult CheckGroupLayout(const TGroupGeometryInfo& geom, const THashMap>& layout); - } // NKikimr::NBsController diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index 567002a16443..9dbca01e5079 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -20,6 +20,8 @@ namespace NKikimr { namespace NBsController { +class TGroupGeometryInfo; + using NTabletFlatExecutor::TTabletExecutedFlat; using NTabletFlatExecutor::ITransaction; using NTabletFlatExecutor::TTransactionBase; @@ -618,6 +620,12 @@ class TBlobStorageController : public TActor, public TTa // be recalculated too void CalculateGroupStatus(); + // group layout status: whether it is positioned correctly + bool LayoutCorrect = false; + + void CalculateLayoutStatus(TBlobStorageController *self, TBlobStorageGroupInfo::TTopology *topology, + const std::function& getGeom); + template static void Apply(TBlobStorageController* /*controller*/, T&& callback) { static TTableAdapter, public TTa private: TString InstanceId; std::shared_ptr SelfHealUnreassignableGroups = std::make_shared(); + std::shared_ptr GroupLayoutSanitizerInvalidGroups = std::make_shared(); TMaybe MigrationId; TVSlots VSlots; // ordering is important TPDisks PDisks; // ordering is important diff --git a/ydb/core/mind/bscontroller/load_everything.cpp b/ydb/core/mind/bscontroller/load_everything.cpp index 803c1e7dc3f6..ef0b79e81d45 100644 --- a/ydb/core/mind/bscontroller/load_everything.cpp +++ b/ydb/core/mind/bscontroller/load_everything.cpp @@ -1,5 +1,6 @@ #include "impl.h" #include "console_interaction.h" +#include "group_geometry_info.h" #include @@ -518,9 +519,23 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBase cache; + // calculate group status for all groups for (auto& [id, group] : Self->GroupMap) { group->CalculateGroupStatus(); + + group->CalculateLayoutStatus(Self, group->Topology.get(), [&] { + const auto [it, inserted] = cache.try_emplace(group->StoragePoolId); + if (inserted) { + if (const auto jt = Self->StoragePools.find(it->first); jt != Self->StoragePools.end()) { + it->second = TGroupGeometryInfo(group->Topology->GType, jt->second.GetGroupGeometry()); + } else { + Y_DEBUG_ABORT(); + } + } + return it->second; + }); } return true; diff --git a/ydb/core/mind/bscontroller/monitoring.cpp b/ydb/core/mind/bscontroller/monitoring.cpp index c566743ef28f..15758be7dc87 100644 --- a/ydb/core/mind/bscontroller/monitoring.cpp +++ b/ydb/core/mind/bscontroller/monitoring.cpp @@ -1388,6 +1388,7 @@ void TBlobStorageController::RenderGroupTable(IOutputStream& out, std::function< TAG_ATTRS(TTableH, {{"title", "PutUserData Latency"}}) { out << "PutUserData
Latency"; } TAG_ATTRS(TTableH, {{"title", "GetFast Latency"}}) { out << "GetFast
Latency"; } TABLEH() { out << "Seen operational"; } + TABLEH() { out << "Layout correct"; } TABLEH() { out << "Operating
status"; } TABLEH() { out << "Expected
status"; } TABLEH() { out << "Donors"; } @@ -1448,6 +1449,7 @@ void TBlobStorageController::RenderGroupRow(IOutputStream& out, const TGroupInfo renderLatency(group.LatencyStats.PutUserData); renderLatency(group.LatencyStats.GetFast); TABLED() { out << (group.SeenOperational ? "YES" : ""); } + TABLED() { out << (group.LayoutCorrect ? "" : "NO"); } const auto& status = group.Status; TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.OperatingStatus); } diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp index 7618d1a93e03..de5282bce921 100644 --- a/ydb/core/mind/bscontroller/self_heal.cpp +++ b/ydb/core/mind/bscontroller/self_heal.cpp @@ -284,6 +284,7 @@ namespace NKikimr::NBsController { bool DonorMode; THostRecordMap HostRecords; std::shared_ptr EnableSelfHealWithDegraded; + std::shared_ptr GroupsWithInvalidLayoutCounter; using TTopologyDescr = std::tuple; THashMap> Topologies; @@ -296,7 +297,8 @@ namespace NKikimr::NBsController { public: TSelfHealActor(ui64 tabletId, std::shared_ptr unreassignableGroups, THostRecordMap hostRecords, bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode, - std::shared_ptr enableSelfHealWithDegraded) + std::shared_ptr enableSelfHealWithDegraded, + std::shared_ptr groupsWithInvalidLayoutCounter) : TabletId(tabletId) , UnreassignableGroups(std::move(unreassignableGroups)) , GroupLayoutSanitizerEnabled(groupLayoutSanitizerEnabled) @@ -304,6 +306,7 @@ namespace NKikimr::NBsController { , DonorMode(donorMode) , HostRecords(std::move(hostRecords)) , EnableSelfHealWithDegraded(std::move(enableSelfHealWithDegraded)) + , GroupsWithInvalidLayoutCounter(std::move(groupsWithInvalidLayoutCounter)) {} void Bootstrap(const TActorId& parentId) { @@ -318,17 +321,16 @@ namespace NKikimr::NBsController { void Handle(TEvControllerUpdateSelfHealInfo::TPtr& ev) { if (const auto& setting = ev->Get()->GroupLayoutSanitizerEnabled) { - bool previousSetting = std::exchange(GroupLayoutSanitizerEnabled, *setting); - if (!previousSetting && GroupLayoutSanitizerEnabled) { - UpdateLayoutInformationForAllGroups(); - } + std::exchange(GroupLayoutSanitizerEnabled, *setting); } + if (const auto& setting = ev->Get()->AllowMultipleRealmsOccupation) { bool previousSetting = std::exchange(AllowMultipleRealmsOccupation, *setting); if (previousSetting != AllowMultipleRealmsOccupation) { UpdateLayoutInformationForAllGroups(); } } + if (const auto& setting = ev->Get()->DonorMode) { DonorMode = *setting; } @@ -345,9 +347,7 @@ namespace NKikimr::NBsController { g.Content = std::move(*data); - if (GroupLayoutSanitizerEnabled) { - UpdateGroupLayoutInformation(g); - } + UpdateGroupLayoutInformation(g); ui32 numFailRealms = 0; ui32 numFailDomainsPerFailRealm = 0; @@ -500,6 +500,7 @@ namespace NKikimr::NBsController { } } + GroupsWithInvalidLayoutCounter->store(GroupsWithInvalidLayout.Size()); UnreassignableGroups->store(counter); } @@ -899,7 +900,7 @@ namespace NKikimr::NBsController { IActor *TBlobStorageController::CreateSelfHealActor() { Y_ABORT_UNLESS(HostRecords); return new TSelfHealActor(TabletID(), SelfHealUnreassignableGroups, HostRecords, GroupLayoutSanitizerEnabled, - AllowMultipleRealmsOccupation, DonorMode, EnableSelfHealWithDegraded); + AllowMultipleRealmsOccupation, DonorMode, EnableSelfHealWithDegraded, GroupLayoutSanitizerInvalidGroups); } void TBlobStorageController::InitializeSelfHealState() { @@ -1159,6 +1160,7 @@ namespace NKikimr::NBsController { ); TabletCounters->Simple()[NBlobStorageController::COUNTER_SELF_HEAL_UNREASSIGNABLE_GROUPS] = SelfHealUnreassignableGroups->load(); + TabletCounters->Simple()[NBlobStorageController::COUNTER_GROUP_LAYOUT_SANITIZER_INVALID_GROUPS] = GroupLayoutSanitizerInvalidGroups->load(); Schedule(TDuration::Seconds(15), new TEvPrivate::TEvUpdateSelfHealCounters); } diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp index c28c1440d2ed..77ce9732daec 100644 --- a/ydb/core/mind/bscontroller/sys_view.cpp +++ b/ydb/core/mind/bscontroller/sys_view.cpp @@ -1,6 +1,7 @@ #include "sys_view.h" #include "group_geometry_info.h" #include "storage_stats_calculator.h" +#include "group_layout_checker.h" #include #include @@ -398,6 +399,8 @@ void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolderSetGetFastLatency(latencyStats.GetFast->MicroSeconds()); } + + info->SetLayoutCorrect(groupInfo->LayoutCorrect); } void CopyInfo(NKikimrSysView::TStoragePoolInfo* info, const TBlobStorageController::TStoragePoolInfo& poolInfo) { @@ -517,6 +520,7 @@ void TBlobStorageController::UpdateSystemViews() { const NKikimrBlobStorage::TVDiskMetrics zero; std::vector disks; + std::vector pdiskIds; for (const auto& realm : group.GetRings()) { for (const auto& domain : realm.GetFailDomains()) { for (const auto& location : domain.GetVDiskLocations()) { @@ -532,10 +536,28 @@ void TBlobStorageController::UpdateSystemViews() { if (disk.VDiskMetrics && disk.PDiskMetrics) { disks.push_back(std::move(disk)); } + pdiskIds.emplace_back(location.GetNodeID(), location.GetPDiskID()); } } } CalculateGroupUsageStats(pb, disks, (TBlobStorageGroupType::EErasureSpecies)group.GetErasureSpecies()); + + if (auto groupInfo = TBlobStorageGroupInfo::Parse(group, nullptr, nullptr)) { + NLayoutChecker::TGroupLayout layout(groupInfo->GetTopology()); + NLayoutChecker::TDomainMapper mapper; + TGroupGeometryInfo geom(groupInfo->Type, SelfManagementEnabled + ? StorageConfig.GetSelfManagementConfig().GetGeometry() + : NKikimrBlobStorage::TGroupGeometry()); + + Y_DEBUG_ABORT_UNLESS(pdiskIds.size() == groupInfo->GetTotalVDisksNum()); + + for (size_t i = 0; i < pdiskIds.size(); ++i) { + const TPDiskId pdiskId = pdiskIds[i]; + layout.AddDisk({mapper, HostRecords->GetLocation(pdiskId.NodeId), pdiskId, geom}, i); + } + + pb->SetLayoutCorrect(layout.IsCorrect()); + } } } } diff --git a/ydb/core/mind/bscontroller/virtual_group.cpp b/ydb/core/mind/bscontroller/virtual_group.cpp index ee3b31fb2ac2..c49349750e55 100644 --- a/ydb/core/mind/bscontroller/virtual_group.cpp +++ b/ydb/core/mind/bscontroller/virtual_group.cpp @@ -1,5 +1,6 @@ #include "impl.h" #include "config.h" +#include "group_geometry_info.h" namespace NKikimr::NBsController { @@ -89,6 +90,7 @@ namespace NKikimr::NBsController { GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); + group->CalculateLayoutStatus(&Self, group->Topology.get(), {}); NKikimrBlobDepot::TBlobDepotConfig config; config.SetVirtualGroupId(group->ID.GetRawId()); @@ -255,6 +257,14 @@ namespace NKikimr::NBsController { State->DeleteExistingGroup(group->ID); } group->CalculateGroupStatus(); + group->CalculateLayoutStatus(Self, group->Topology.get(), [&] { + const auto& pools = State->StoragePools.Get(); + if (const auto it = pools.find(group->StoragePoolId); it != pools.end()) { + return TGroupGeometryInfo(group->Topology->GType, it->second.GetGroupGeometry()); + } + Y_DEBUG_ABORT(); + return TGroupGeometryInfo(); + }); TString error; if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) { STLOG(PRI_ERROR, BS_CONTROLLER, BSCVG08, "failed to commit update", (VirtualGroupId, GroupId), (Error, error)); diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 5a85cb70829e..83001a9b86db 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1769,6 +1769,18 @@ message THiveConfig { optional uint64 NodeRestartsForPenalty = 85 [default = 3]; } +message THealthCheckConfig { + message TThresholds { + optional uint32 NodeRestartsYellow = 1 [default = 10]; // per period, see HiveConfig.NodeRestartWatchPeriod + optional uint32 NodeRestartsOrange = 2 [default = 30]; // per period, see HiveConfig.NodeRestartWatchPeriod + optional uint64 NodesTimeDifferenceYellow = 3 [default = 5000]; // microseconds + optional uint64 NodesTimeDifferenceOrange = 4 [default = 25000]; // microseconds + optional uint32 TabletsRestartsOrange = 5 [default = 30]; // per period, see HiveConfig.TabletRestartWatchPeriod + } + optional TThresholds Thresholds = 1; + optional uint32 Timeout = 2 [default = 20000]; // milliseconds +} + message TBlobCacheConfig { optional uint64 MaxSizeBytes = 1 [default = 1073741824]; } @@ -2254,6 +2266,7 @@ message TAppConfig { optional TSelfManagementConfig SelfManagementConfig = 86; optional NKikimrProto.TDataIntegrityTrailsConfig DataIntegrityTrailsConfig = 87; optional TDataErasureConfig DataErasureConfig = 88; + optional THealthCheckConfig HealthCheckConfig = 89; repeated TNamedConfig NamedConfigs = 100; optional string ClusterYamlConfig = 101; diff --git a/ydb/core/protos/console_config.proto b/ydb/core/protos/console_config.proto index 1f811565c294..a14c980aefe1 100644 --- a/ydb/core/protos/console_config.proto +++ b/ydb/core/protos/console_config.proto @@ -143,6 +143,7 @@ message TConfigItem { GroupedMemoryLimiterConfig = 82; ReplicationConfigItem = 83; CompPrioritiesConfig = 85; + HealthCheckConfigItem = 89; NamedConfigsItem = 100; ClusterYamlConfigItem = 101; diff --git a/ydb/core/protos/counters_bs_controller.proto b/ydb/core/protos/counters_bs_controller.proto index 5d6cdae97e58..82642fa4249a 100644 --- a/ydb/core/protos/counters_bs_controller.proto +++ b/ydb/core/protos/counters_bs_controller.proto @@ -28,6 +28,7 @@ enum ESimpleCounters { COUNTER_DISK_SCRUB_CUR_DISKS = 18 [(CounterOpts) = {Name: "CurrentlyScrubbedDisks"}]; COUNTER_DISK_SCRUB_CUR_GROUPS = 19 [(CounterOpts) = {Name: "CurrentlyScrubbedGroups"}]; COUNTER_SELF_HEAL_UNREASSIGNABLE_GROUPS = 20 [(CounterOpts) = {Name: "SelfHealUnreassignableGroups"}]; + COUNTER_GROUP_LAYOUT_SANITIZER_INVALID_GROUPS = 21 [(CounterOpts) = {Name: "GroupLayoutSanitizerInvlaidGroups"}]; } enum ECumulativeCounters { diff --git a/ydb/core/protos/sys_view.proto b/ydb/core/protos/sys_view.proto index e5f215dec817..e0f2c4f81d3d 100644 --- a/ydb/core/protos/sys_view.proto +++ b/ydb/core/protos/sys_view.proto @@ -265,6 +265,7 @@ message TGroupInfo { // desired disk categories ? // down/persisted down ? // metrics ? + optional bool LayoutCorrect = 16; // is the group layout correct? } message TGroupEntry { diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h index ddcfcab7b70d..7c38021c4d46 100644 --- a/ydb/core/sys_view/common/schema.h +++ b/ydb/core/sys_view/common/schema.h @@ -306,6 +306,7 @@ struct Schema : NIceDb::Schema { struct PutTabletLogLatency : Column<13, NScheme::NTypeIds::Interval> {}; struct PutUserDataLatency : Column<14, NScheme::NTypeIds::Interval> {}; struct GetFastLatency : Column<15, NScheme::NTypeIds::Interval> {}; + struct LayoutCorrect : Column<16, NScheme::NTypeIds::Bool> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -321,7 +322,8 @@ struct Schema : NIceDb::Schema { SeenOperational, PutTabletLogLatency, PutUserDataLatency, - GetFastLatency>; + GetFastLatency, + LayoutCorrect>; }; struct StoragePools : Table<7> { diff --git a/ydb/core/sys_view/storage/groups.cpp b/ydb/core/sys_view/storage/groups.cpp index cca51da22522..11a0ded27684 100644 --- a/ydb/core/sys_view/storage/groups.cpp +++ b/ydb/core/sys_view/storage/groups.cpp @@ -36,6 +36,7 @@ class TGroupsScan : public TStorageScanBase= 0x80000000; )").GetValueSync(); @@ -1090,7 +1091,7 @@ Y_UNIT_TEST_SUITE(SystemView) { } } - TYsonFieldChecker check(ysonString, 12); + TYsonFieldChecker check(ysonString, 13); check.Uint64(0u); // AllocatedSize check.Uint64GreaterOrEquals(0u); // AvailableSize @@ -1104,6 +1105,7 @@ Y_UNIT_TEST_SUITE(SystemView) { check.Null(); // PutTabletLogLatency check.Null(); // PutUserDataLatency check.Uint64(2u); // StoragePoolId + check.Bool(true); // LayoutCorrect } Y_UNIT_TEST(StoragePoolsFields) {