[apps/regression] Compute R2 better when dealing with constant regression

Change-Id: Ic724d8d96cb723718a1ce57e72132972a782fc5e
This commit is contained in:
Hugo Saint-Vignes
2020-10-01 10:29:03 +02:00
committed by Émilie Feral
parent 3f43504398
commit e517128a9e
2 changed files with 47 additions and 7 deletions

View File

@@ -309,10 +309,12 @@ double Store::correlationCoefficient(int series) const {
double Store::computeDeterminationCoefficient(int series, Poincare::Context * globalContext) {
/* Computes and returns the determination coefficient (R2) of the regression.
* For regressions, it is equal to the square of the correlation coefficient between
* the series Y and the evaluated values from the series X and the selected model
* Computing the coefficient using the latter equality would require more calls to the evaluated
* values and would be less precise. */
* For linear regressions, it is equal to the square of the correlation
* coefficient between the series Y and the evaluated values.
* With proportional regression or badly fitted models, R2 can technically be
* negative. R2<0 means that the regression is less effective than a
* constant set to the series average. It should not happen with regression
* models that can fit a constant observation. */
// Residual sum of squares
double ssr = 0;
// Total sum of squares
@@ -327,7 +329,15 @@ double Store::computeDeterminationCoefficient(int series, Poincare::Context * gl
double difference = m_data[series][1][k] - mean;
sst += difference * difference;
}
return sst == 0.0 ? 1.0 : 1.0 - ssr / sst;
if (sst == 0.0) {
/* Observation was constant, r2 is undefined. Return 1 if estimations
* exactly matched observations. 0 is usually returned otherwise. */
return (ssr <= DBL_EPSILON) ? 1.0 : 0.0;
}
double r2 = 1.0 - ssr / sst;
// Check if regression fit was optimal.
assert(r2 >= 0 || seriesRegressionType(series) == Model::Type::Proportional);
return r2;
}
Model * Store::regressionModel(int index) {

View File

@@ -34,7 +34,7 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T
double precision = 1e-2;
// When trueCoefficients = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
double reference = 100.0 * DBL_EPSILON;
double reference = 1e6 * DBL_EPSILON;
// Compute and compare the coefficients
double * coefficients = store.coefficientsForSeries(series, &context);
@@ -45,7 +45,7 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T
// Compute and check r2 value and sign
double r2 = store.determinationCoefficientForSeries(series, &globalContext);
quiz_assert(r2 >= 0.0);
quiz_assert(r2 <= 1.0 && (r2 >= 0.0 || modelType == Model::Type::Proportional));
quiz_assert(IsApproximatelyEqual(r2, trueR2, precision, reference));
}
@@ -82,6 +82,36 @@ QUIZ_CASE(proportional_regression2) {
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(proportional_regression3) {
constexpr int numberOfPoints = 4;
double x[numberOfPoints] = {1.0, 2.0, 3.0, 4.0};
double y[numberOfPoints] = {0.0, 0.0, 0.0, 0.0};
double coefficients[] = {0.0};
double r2 = 1.0;
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(proportional_regression4) {
constexpr int numberOfPoints = 3;
double x[numberOfPoints] = {-1.0, 0.0, 1.0};
double y[numberOfPoints] = {1.0, 1.0, 1.0};
double coefficients[] = {0.0};
// Y is constant, and proportional regression cannot fit it, R2 is null.
double r2 = 0.0;
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(proportional_regression5) {
constexpr int numberOfPoints = 3;
double x[numberOfPoints] = {-1.0, 0.0, 1.0};
double y[numberOfPoints] = {1.0, 1.01, 1.0};
double coefficients[] = {0.0};
/* In this case, proportional regression performed poorly compared to a
* constant regression, R2 is negative. */
double r2 = -45300.5;
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(quadratic_regression) {
double x[] = {-34.0, -12.0, 5.0, 86.0, -2.0};
double y[] = {-8241.389, -1194.734, -59.163, - 46245.39, -71.774};