mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-03-18 21:30:38 +01:00
[apps/regression] Compute R2 better when dealing with constant regression
Change-Id: Ic724d8d96cb723718a1ce57e72132972a782fc5e
This commit is contained in:
committed by
Émilie Feral
parent
3f43504398
commit
e517128a9e
@@ -309,10 +309,12 @@ double Store::correlationCoefficient(int series) const {
|
||||
|
||||
double Store::computeDeterminationCoefficient(int series, Poincare::Context * globalContext) {
|
||||
/* Computes and returns the determination coefficient (R2) of the regression.
|
||||
* For regressions, it is equal to the square of the correlation coefficient between
|
||||
* the series Y and the evaluated values from the series X and the selected model
|
||||
* Computing the coefficient using the latter equality would require more calls to the evaluated
|
||||
* values and would be less precise. */
|
||||
* For linear regressions, it is equal to the square of the correlation
|
||||
* coefficient between the series Y and the evaluated values.
|
||||
* With proportional regression or badly fitted models, R2 can technically be
|
||||
* negative. R2<0 means that the regression is less effective than a
|
||||
* constant set to the series average. It should not happen with regression
|
||||
* models that can fit a constant observation. */
|
||||
// Residual sum of squares
|
||||
double ssr = 0;
|
||||
// Total sum of squares
|
||||
@@ -327,7 +329,15 @@ double Store::computeDeterminationCoefficient(int series, Poincare::Context * gl
|
||||
double difference = m_data[series][1][k] - mean;
|
||||
sst += difference * difference;
|
||||
}
|
||||
return sst == 0.0 ? 1.0 : 1.0 - ssr / sst;
|
||||
if (sst == 0.0) {
|
||||
/* Observation was constant, r2 is undefined. Return 1 if estimations
|
||||
* exactly matched observations. 0 is usually returned otherwise. */
|
||||
return (ssr <= DBL_EPSILON) ? 1.0 : 0.0;
|
||||
}
|
||||
double r2 = 1.0 - ssr / sst;
|
||||
// Check if regression fit was optimal.
|
||||
assert(r2 >= 0 || seriesRegressionType(series) == Model::Type::Proportional);
|
||||
return r2;
|
||||
}
|
||||
|
||||
Model * Store::regressionModel(int index) {
|
||||
|
||||
@@ -34,7 +34,7 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T
|
||||
|
||||
double precision = 1e-2;
|
||||
// When trueCoefficients = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
|
||||
double reference = 100.0 * DBL_EPSILON;
|
||||
double reference = 1e6 * DBL_EPSILON;
|
||||
|
||||
// Compute and compare the coefficients
|
||||
double * coefficients = store.coefficientsForSeries(series, &context);
|
||||
@@ -45,7 +45,7 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T
|
||||
|
||||
// Compute and check r2 value and sign
|
||||
double r2 = store.determinationCoefficientForSeries(series, &globalContext);
|
||||
quiz_assert(r2 >= 0.0);
|
||||
quiz_assert(r2 <= 1.0 && (r2 >= 0.0 || modelType == Model::Type::Proportional));
|
||||
quiz_assert(IsApproximatelyEqual(r2, trueR2, precision, reference));
|
||||
}
|
||||
|
||||
@@ -82,6 +82,36 @@ QUIZ_CASE(proportional_regression2) {
|
||||
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
|
||||
}
|
||||
|
||||
QUIZ_CASE(proportional_regression3) {
|
||||
constexpr int numberOfPoints = 4;
|
||||
double x[numberOfPoints] = {1.0, 2.0, 3.0, 4.0};
|
||||
double y[numberOfPoints] = {0.0, 0.0, 0.0, 0.0};
|
||||
double coefficients[] = {0.0};
|
||||
double r2 = 1.0;
|
||||
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
|
||||
}
|
||||
|
||||
QUIZ_CASE(proportional_regression4) {
|
||||
constexpr int numberOfPoints = 3;
|
||||
double x[numberOfPoints] = {-1.0, 0.0, 1.0};
|
||||
double y[numberOfPoints] = {1.0, 1.0, 1.0};
|
||||
double coefficients[] = {0.0};
|
||||
// Y is constant, and proportional regression cannot fit it, R2 is null.
|
||||
double r2 = 0.0;
|
||||
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
|
||||
}
|
||||
|
||||
QUIZ_CASE(proportional_regression5) {
|
||||
constexpr int numberOfPoints = 3;
|
||||
double x[numberOfPoints] = {-1.0, 0.0, 1.0};
|
||||
double y[numberOfPoints] = {1.0, 1.01, 1.0};
|
||||
double coefficients[] = {0.0};
|
||||
/* In this case, proportional regression performed poorly compared to a
|
||||
* constant regression, R2 is negative. */
|
||||
double r2 = -45300.5;
|
||||
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
|
||||
}
|
||||
|
||||
QUIZ_CASE(quadratic_regression) {
|
||||
double x[] = {-34.0, -12.0, 5.0, 86.0, -2.0};
|
||||
double y[] = {-8241.389, -1194.734, -59.163, - 46245.39, -71.774};
|
||||
|
||||
Reference in New Issue
Block a user