Skip to content

Commit b64d044

Browse files
committed
[RDF] Share column readers for Defines not affected by variations.
When a Define is added to a branch that gets varied, a new column reader was created for every variation and for every slot, irrespective of whether the column gets varied or not. For snapshot with variations, this column would be written for every variation, although it always evaluates to the same value. To prevent this duplication, the column readers that point to the same Define are shared after this commit. The readers are still cloned per slot though, so the Define's value caches remain thread safe. This allows SnapshotWithVariations to detect that columns are identical, so they are not written multiple times. The memory savings amount to 24 bytes per suppressed column reader,so they won't have a notable impact.
1 parent 675ac5c commit b64d044

File tree

2 files changed

+24
-15
lines changed

2 files changed

+24
-15
lines changed

tree/dataframe/inc/ROOT/RDF/RDefineReader.hxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class RDefinesWithReaders {
6262
// (see BookDefineJit). it is never null.
6363
std::shared_ptr<ROOT::Detail::RDF::RDefineBase> fDefine;
6464
// Column readers per variation (in the map) per slot (in the vector).
65-
std::vector<std::unordered_map<std::string_view, std::unique_ptr<RDefineReader>>> fReadersPerVariation;
65+
std::vector<std::unordered_map<std::string_view, std::shared_ptr<RDefineReader>>> fReadersPerVariation;
6666

6767
// Strings that were already used to represent column names in this RDataFrame instance.
6868
ROOT::Internal::RDF::RStringCache &fCachedColNames;

tree/dataframe/src/RDefineReader.cxx

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,28 @@ ROOT::Internal::RDF::RDefinesWithReaders::GetReader(unsigned int slot, std::stri
3232
if (it != defineReaders.end())
3333
return *it->second;
3434

35+
std::shared_ptr<RDefineReader> readerToReturn;
3536
auto *define = fDefine.get();
36-
if (*nameIt != "nominal")
37-
define = &define->GetVariedDefine(std::string(variationName));
38-
39-
#if !defined(__clang__) && __GNUC__ >= 7 && __GNUC_MINOR__ >= 3
40-
const auto insertion =
41-
defineReaders.insert({*nameIt, std::make_unique<ROOT::Internal::RDF::RDefineReader>(slot, *define)});
42-
return *insertion.first->second;
43-
#else
44-
// gcc < 7.3 has issues with passing the non-movable std::pair temporary into the insert call
45-
auto reader = std::make_unique<ROOT::Internal::RDF::RDefineReader>(slot, *define);
46-
auto &ret = *reader;
47-
defineReaders[*nameIt] = std::move(reader);
48-
return ret;
49-
#endif
37+
if (*nameIt == "nominal") {
38+
readerToReturn = std::make_shared<RDefineReader>(slot, *define);
39+
} else {
40+
auto *variedDefine = &define->GetVariedDefine(std::string(variationName));
41+
if (variedDefine == define) {
42+
// The column in not affected by variations. We can return the same reader as for nominal
43+
if (auto nominalReaderIt = defineReaders.find("nominal"); nominalReaderIt != defineReaders.end()) {
44+
readerToReturn = nominalReaderIt->second;
45+
} else {
46+
// The nominal reader doesn't exist yet
47+
readerToReturn = std::make_shared<RDefineReader>(slot, *define);
48+
auto nominalNameIt = fCachedColNames.Insert("nominal");
49+
defineReaders.insert({*nominalNameIt, readerToReturn});
50+
}
51+
} else {
52+
readerToReturn = std::make_shared<RDefineReader>(slot, *variedDefine);
53+
}
54+
}
55+
56+
defineReaders.insert({*nameIt, readerToReturn});
57+
58+
return *readerToReturn;
5059
}

0 commit comments

Comments
 (0)