[apps/regression] Compute R2 better when dealing with constant regression

Change-Id: Ic724d8d96cb723718a1ce57e72132972a782fc5e
2026-03-18 21:30:38 +01:00 · 2020-10-01 10:29:03 +02:00
parent 3f43504398
commit e517128a9e
2 changed files with 47 additions and 7 deletions
--- a/apps/regression/store.cpp
+++ b/apps/regression/store.cpp
@@ -309,10 +309,12 @@ double Store::correlationCoefficient(int series) const {

 double Store::computeDeterminationCoefficient(int series, Poincare::Context * globalContext) {
  /* Computes and returns the determination coefficient (R2) of the regression.
-   * For regressions, it is equal to the square of the correlation coefficient between
-   * the series Y and the evaluated values from the series X and the selected model
-   * Computing the coefficient using the latter equality would require more calls to the evaluated
-   * values and would be less precise. */
+   * For linear regressions, it is equal to the square of the correlation
+   * coefficient between the series Y and the evaluated values.
+   * With proportional regression or badly fitted models, R2 can technically be
+   * negative. R2<0 means that the regression is less effective than a
+   * constant set to the series average. It should not happen with regression
+   * models that can fit a constant observation. */
  // Residual sum of squares
  double ssr = 0;
  // Total sum of squares
@@ -327,7 +329,15 @@ double Store::computeDeterminationCoefficient(int series, Poincare::Context * gl
    double difference = m_data[series][1][k] - mean;
    sst += difference * difference;
  }
-  return sst == 0.0 ? 1.0 : 1.0 - ssr / sst;
+  if (sst == 0.0) {
+    /* Observation was constant, r2 is undefined. Return 1 if estimations
+     * exactly matched observations. 0 is usually returned otherwise. */
+    return (ssr <= DBL_EPSILON) ? 1.0 : 0.0;
+  }
+  double r2 = 1.0 - ssr / sst;
+  // Check if regression fit was optimal.
+  assert(r2 >= 0 || seriesRegressionType(series) == Model::Type::Proportional);
+  return r2;
 }

 Model * Store::regressionModel(int index) {
--- a/apps/regression/test/model.cpp
+++ b/apps/regression/test/model.cpp
@@ -34,7 +34,7 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T

  double precision = 1e-2;
  // When trueCoefficients = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
-  double reference = 100.0 * DBL_EPSILON;
+  double reference = 1e6 * DBL_EPSILON;

  // Compute and compare the coefficients
  double * coefficients = store.coefficientsForSeries(series, &context);
@@ -45,7 +45,7 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T

  // Compute and check r2 value and sign
  double r2 = store.determinationCoefficientForSeries(series, &globalContext);
-  quiz_assert(r2 >= 0.0);
+  quiz_assert(r2 <= 1.0 && (r2 >= 0.0 || modelType == Model::Type::Proportional));
  quiz_assert(IsApproximatelyEqual(r2, trueR2, precision, reference));
 }

@@ -82,6 +82,36 @@ QUIZ_CASE(proportional_regression2) {
  assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
 }

+QUIZ_CASE(proportional_regression3) {
+  constexpr int numberOfPoints = 4;
+  double x[numberOfPoints] = {1.0, 2.0, 3.0, 4.0};
+  double y[numberOfPoints] = {0.0, 0.0, 0.0, 0.0};
+  double coefficients[] = {0.0};
+  double r2 = 1.0;
+  assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
+}
+
+QUIZ_CASE(proportional_regression4) {
+  constexpr int numberOfPoints = 3;
+  double x[numberOfPoints] = {-1.0, 0.0, 1.0};
+  double y[numberOfPoints] = {1.0, 1.0, 1.0};
+  double coefficients[] = {0.0};
+  // Y is constant, and proportional regression cannot fit it, R2 is null.
+  double r2 = 0.0;
+  assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
+}
+
+QUIZ_CASE(proportional_regression5) {
+  constexpr int numberOfPoints = 3;
+  double x[numberOfPoints] = {-1.0, 0.0, 1.0};
+  double y[numberOfPoints] = {1.0, 1.01, 1.0};
+  double coefficients[] = {0.0};
+  /* In this case, proportional regression performed poorly compared to a
+   * constant regression, R2 is negative. */
+  double r2 = -45300.5;
+  assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
+}
+
 QUIZ_CASE(quadratic_regression) {
  double x[] = {-34.0, -12.0, 5.0, 86.0, -2.0};
  double y[] = {-8241.389, -1194.734, -59.163, - 46245.39, -71.774};