Skip to content

Commit 2bce80f

Browse files
Xinyi Wangfacebook-github-bot
authored andcommitted
log query empty count vs total count (#5657)
Summary: X-link: facebookresearch/FBGEMM#2601 This diff adds total query count and empty query count and log them Reviewed By: kausv Differential Revision: D101304376
1 parent 03794f5 commit 2bce80f

2 files changed

Lines changed: 61 additions & 1 deletion

File tree

fbgemm_gpu/fbgemm_gpu/tbe/ssd/training.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,15 @@ def __init__(
11601160
self.dram_kv_miss_count_stats_name: str = (
11611161
f"dram_kv.perf.get.tbe_id{tbe_unique_id}.dram_read_miss_count"
11621162
)
1163+
self.enrichment_query_count_stats_name: str = (
1164+
f"dram_kv.perf.get.tbe_id{tbe_unique_id}.enrichment_query_count"
1165+
)
1166+
self.enrichment_empty_count_stats_name: str = (
1167+
f"dram_kv.perf.get.tbe_id{tbe_unique_id}.enrichment_empty_count"
1168+
)
1169+
self.enrichment_success_rate_stats_name: str = (
1170+
f"dram_kv.perf.get.tbe_id{tbe_unique_id}.enrichment_success_rate_pct"
1171+
)
11631172
self.l1_hit_rate_stats_name: str = (
11641173
f"ssd_tbe.prefetch.tbe_id{tbe_unique_id}.l1_hit_rate_pct"
11651174
)
@@ -1212,6 +1221,9 @@ def __init__(
12121221
self.stats_reporter.register_stats(self.dram_kv_hit_rate_stats_name)
12131222
self.stats_reporter.register_stats(self.dram_kv_hit_count_stats_name)
12141223
self.stats_reporter.register_stats(self.dram_kv_miss_count_stats_name)
1224+
self.stats_reporter.register_stats(self.enrichment_query_count_stats_name)
1225+
self.stats_reporter.register_stats(self.enrichment_empty_count_stats_name)
1226+
self.stats_reporter.register_stats(self.enrichment_success_rate_stats_name)
12151227
self.stats_reporter.register_stats(self.l1_hit_rate_stats_name)
12161228
for t in self.feature_table_map:
12171229
self.stats_reporter.register_stats(
@@ -4720,6 +4732,35 @@ def _report_dram_kv_perf_stats(self) -> None:
47204732
enable_tb_metrics=True,
47214733
)
47224734

4735+
# Enrichment query metrics (indices 38-39)
4736+
if len(dram_kv_perf_stats) >= 40:
4737+
enrichment_query_count = dram_kv_perf_stats[38]
4738+
enrichment_empty_count = dram_kv_perf_stats[39]
4739+
stats_reporter.report_data_amount(
4740+
iteration_step=self.step,
4741+
event_name=self.enrichment_query_count_stats_name,
4742+
data_bytes=enrichment_query_count,
4743+
enable_tb_metrics=True,
4744+
)
4745+
stats_reporter.report_data_amount(
4746+
iteration_step=self.step,
4747+
event_name=self.enrichment_empty_count_stats_name,
4748+
data_bytes=enrichment_empty_count,
4749+
enable_tb_metrics=True,
4750+
)
4751+
if enrichment_query_count > 0:
4752+
enrichment_success_rate = (
4753+
100.0
4754+
* (enrichment_query_count - enrichment_empty_count)
4755+
/ enrichment_query_count
4756+
)
4757+
stats_reporter.report_data_amount(
4758+
iteration_step=self.step,
4759+
event_name=self.enrichment_success_rate_stats_name,
4760+
data_bytes=enrichment_success_rate,
4761+
enable_tb_metrics=True,
4762+
)
4763+
47234764
def _recording_to_timer(self, timer: AsyncSeriesTimer | None, **kwargs: Any) -> Any:
47244765
"""
47254766
helper function to call AsyncSeriesTimer, wrap it inside the kernels we want to record

fbgemm_gpu/src/dram_kv_embedding_cache/dram_kv_embedding_cache.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,16 @@ class DramKVEmbeddingCache : public kv_db::EmbeddingKVDB {
836836
XLOG(INFO) << "[EmbeddingCacheEnrich] " << log_prefix
837837
<< payloads.size() << "/" << unhashed_ids.size()
838838
<< ", latency_ms: " << latency_ms;
839+
enrichment_query_count_.fetch_add(unhashed_ids.size());
840+
if (unhashed_ids.size() >= payloads.size()) {
841+
enrichment_empty_count_.fetch_add(
842+
unhashed_ids.size() - payloads.size());
843+
} else {
844+
XLOG(WARN) << "[EmbeddingCacheEnrich] " << log_prefix
845+
<< "payloads.size() (" << payloads.size()
846+
<< ") > unhashed_ids.size() (" << unhashed_ids.size()
847+
<< ")";
848+
}
839849
if (!payloads.empty()) {
840850
auto result = prepareFn(hashed_ids, unhashed_ids, payloads);
841851
if (result.has_value()) {
@@ -2049,7 +2059,7 @@ class DramKVEmbeddingCache : public kv_db::EmbeddingKVDB {
20492059
std::vector<double> get_dram_kv_perf(
20502060
const int64_t step,
20512061
const int64_t interval) {
2052-
std::vector<double> ret(38, 0); // num metrics
2062+
std::vector<double> ret(40, 0); // num metrics
20532063
if (step > 0 && step % interval == 0) {
20542064
const double d_interval = static_cast<double>(interval);
20552065
int reset_val = 0;
@@ -2125,6 +2135,8 @@ class DramKVEmbeddingCache : public kv_db::EmbeddingKVDB {
21252135
auto read_num_counts = read_num_counts_.exchange(reset_val);
21262136
auto read_hit_count = read_hit_count_.exchange(reset_val);
21272137
auto read_miss_count = read_miss_count_.exchange(reset_val);
2138+
auto enrichment_query_count = enrichment_query_count_.exchange(reset_val);
2139+
auto enrichment_empty_count = enrichment_empty_count_.exchange(reset_val);
21282140

21292141
ret[0] = dram_read_total_duration / d_interval;
21302142
ret[1] = dram_read_sharding_total_duration / d_interval;
@@ -2173,6 +2185,9 @@ class DramKVEmbeddingCache : public kv_db::EmbeddingKVDB {
21732185

21742186
ret[36] = read_hit_count;
21752187
ret[37] = read_miss_count;
2188+
2189+
ret[38] = enrichment_query_count;
2190+
ret[39] = enrichment_empty_count;
21762191
}
21772192
return ret;
21782193
}
@@ -2436,6 +2451,10 @@ class DramKVEmbeddingCache : public kv_db::EmbeddingKVDB {
24362451
std::atomic<int64_t> read_hit_count_{0};
24372452
std::atomic<int64_t> read_miss_count_{0};
24382453

2454+
// Enrichment (laser) query counters: total keys queried and empty results
2455+
std::atomic<int64_t> enrichment_query_count_{0};
2456+
std::atomic<int64_t> enrichment_empty_count_{0};
2457+
24392458
bool disable_random_init_;
24402459

24412460
// Whether raw embedding streaming (RES) is enabled for this cache

0 commit comments

Comments
 (0)