Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add population variance kernel #515

Merged
merged 1 commit into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/sphinx/calql.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ This table contains a quick reference of all CalQL statements:
inclusive_ratio(<a>,<b>,<S>) # computes inclusive_sum(<a>)/inclusive_sum(<b>)*<S>
inclusive_min(<a>) # compute inclusive min of <a>
inclusive_max(<a>) # compute inclusive max of <a>
variance(<a>) # compute population variance (sum(a^2)/N - avg(a)^2) of <a>
... AS <name> # use <name> as column header in tree or table formatter
... UNIT <unit> # use <unit> as unit name

Expand Down
12 changes: 12 additions & 0 deletions src/caliper/controllers/SpotController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,18 @@ const char* spot_controller_spec = R"json(
}
]
},
{
"name": "time.variance",
"type": "bool",
"category": "metric",
"description": "Compute population variance of time across MPI ranks",
"query":
[
{
"level": "cross", "select": [ "variance(inclusive#sum#time.duration) as \"Variance time/rank\"" ]
}
]
},
{
"name": "timeseries",
"type": "bool",
Expand Down
127 changes: 125 additions & 2 deletions src/reader/Aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,124 @@ class AnyKernel : public AggregateKernel {
Config* m_config;
};

class VarianceKernel : public AggregateKernel {
public:

struct StatisticsAttributes {
Attribute variance;
Attribute sum;
Attribute sqsum;
Attribute count;
};

class Config : public AggregateKernelConfig {
std::string m_target_attr_name;
Attribute m_target_attr;

StatisticsAttributes m_stat_attrs;

public:

Attribute get_target_attr(CaliperMetadataAccessInterface& db) {
if (m_target_attr == Attribute::invalid)
m_target_attr = db.get_attribute(m_target_attr_name);
return m_target_attr;
}

bool get_statistics_attributes(CaliperMetadataAccessInterface& db, StatisticsAttributes& a) {
if (m_target_attr == Attribute::invalid)
return false;
if (a.sum != Attribute::invalid) {
a = m_stat_attrs;
return true;
}

int prop = CALI_ATTR_SKIP_EVENTS | CALI_ATTR_ASVALUE;

m_stat_attrs.variance =
db.create_attribute("variance#" + m_target_attr_name, CALI_TYPE_DOUBLE, prop);
m_stat_attrs.count =
db.create_attribute("var.count#" + m_target_attr_name, CALI_TYPE_UINT, prop | CALI_ATTR_HIDDEN);
m_stat_attrs.sum =
db.create_attribute("var.sum#" + m_target_attr_name, CALI_TYPE_DOUBLE, prop | CALI_ATTR_HIDDEN);
m_stat_attrs.sqsum =
db.create_attribute("var.sqsum#" + m_target_attr_name, CALI_TYPE_DOUBLE, prop | CALI_ATTR_HIDDEN);

a = m_stat_attrs;
return true;
}

AggregateKernel* make_kernel() {
return new VarianceKernel(this);
}

Config(const std::string& name)
: m_target_attr_name(name),
m_target_attr(Attribute::invalid)
{ }

static AggregateKernelConfig* create(const std::vector<std::string>& cfg) {
return new Config(cfg.front());
}
};

VarianceKernel(Config* config)
: m_count(0), m_sum(0.0), m_sqsum(0.0), m_config(config)
{ }

const AggregateKernelConfig* config() { return m_config; }

virtual void aggregate(CaliperMetadataAccessInterface& db, const EntryList& list) {
std::lock_guard<std::mutex>
g(m_lock);

Attribute target_attr = m_config->get_target_attr(db);
StatisticsAttributes stat_attr;

if (!m_config->get_statistics_attributes(db, stat_attr))
return;

for (const Entry& e : list) {
if (e.attribute() == target_attr.id()) {
double v = e.value().to_double();
m_sum += v;
m_sqsum += (v*v);
++m_count;
} else if (e.attribute() == stat_attr.sum.id()) {
m_sum += e.value().to_double();
} else if (e.attribute() == stat_attr.sqsum.id()) {
m_sqsum += e.value().to_double();
} else if (e.attribute() == stat_attr.count.id()) {
m_count += e.value().to_uint();
}
}
}

virtual void append_result(CaliperMetadataAccessInterface& db, EntryList& list) {
if (m_count > 0) {
StatisticsAttributes stat_attr;

if (!m_config->get_statistics_attributes(db, stat_attr))
return;

double avg = m_sum/m_count;
list.push_back(Entry(stat_attr.variance, Variant(m_sqsum/m_count - (avg*avg))));
list.push_back(Entry(stat_attr.sum, Variant(m_sum)));
list.push_back(Entry(stat_attr.sqsum, Variant(m_sqsum)));
list.push_back(Entry(stat_attr.count, Variant(cali_make_variant_from_uint(m_count))));
}
}

private:

unsigned m_count;
double m_sum;
double m_sqsum;

std::mutex m_lock;

Config* m_config;
};

enum KernelID {
Count = 0,
Expand All @@ -1262,10 +1380,11 @@ enum KernelID {
ScaledCount = 12,
IRatio = 13,
IMin = 14,
IMax = 15
IMax = 15,
Variance = 16
};

#define MAX_KERNEL_ID IMax
#define MAX_KERNEL_ID Variance

const char* kernel_args[] = { "attribute" };
const char* sratio_args[] = { "numerator", "denominator", "scale" };
Expand All @@ -1289,6 +1408,7 @@ const QuerySpec::FunctionSignature kernel_signatures[] = {
{ KernelID::IRatio, "inclusive_ratio", 2, 3, sratio_args },
{ KernelID::IMin, "inclusive_min", 1, 1, kernel_args },
{ KernelID::IMax, "inclusive_max", 1, 1, kernel_args },
{ KernelID::Variance, "variance", 1, 1, kernel_args },

QuerySpec::FunctionSignatureTerminator
};
Expand All @@ -1313,6 +1433,7 @@ const struct KernelInfo {
{ "inclusive_ratio", ScaledRatioKernel::Config::create_inclusive },
{ "inclusive_min", MinKernel::Config::create_inclusive },
{ "inclusive_max", MaxKernel::Config::create_inclusive },
{ "variance", VarianceKernel::Config::create },

{ 0, 0 }
};
Expand Down Expand Up @@ -1651,6 +1772,8 @@ Aggregator::get_aggregation_attribute_name(const QuerySpec::AggregationOp& op)
return std::string("imin#") + op.args[0];
case KernelID::IMax:
return std::string("imax#") + op.args[0];
case KernelID::Variance:
return std::string("variance#") + op.args[0];
}

return std::string();
Expand Down
4 changes: 4 additions & 0 deletions src/reader/test/test_aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,7 @@ TEST(AggregatorTest, StatisticsKernels) {
spec.aggregate.list.push_back(::make_op("min", "val"));
spec.aggregate.list.push_back(::make_op("max", "val"));
spec.aggregate.list.push_back(::make_op("avg", "val"));
spec.aggregate.list.push_back(::make_op("variance", "val"));

// perform recursive aggregation from two aggregators

Expand All @@ -744,10 +745,12 @@ TEST(AggregatorTest, StatisticsKernels) {
Attribute attr_min = db.get_attribute("min#val");
Attribute attr_max = db.get_attribute("max#val");
Attribute attr_avg = db.get_attribute("avg#val");
Attribute attr_var = db.get_attribute("variance#val");

ASSERT_NE(attr_min, Attribute::invalid);
ASSERT_NE(attr_max, Attribute::invalid);
ASSERT_NE(attr_avg, Attribute::invalid);
ASSERT_NE(attr_var, Attribute::invalid);

std::vector<EntryList> resdb;

Expand All @@ -764,6 +767,7 @@ TEST(AggregatorTest, StatisticsKernels) {
EXPECT_EQ(dict[attr_min.id()].value().to_int(), -4);
EXPECT_EQ(dict[attr_max.id()].value().to_int(), 36);
EXPECT_DOUBLE_EQ(dict[attr_avg.id()].value().to_double(), 16.5);
EXPECT_DOUBLE_EQ(dict[attr_var.id()].value().to_double(), 2018.0/4.0 - (16.5*16.5));
}

TEST(AggregatorTest, ScaledRatioKernel) {
Expand Down
Loading