Files
Upsilon/apps/regression/test/model.cpp
2020-12-09 17:31:14 +01:00

348 lines
14 KiB
C++

#include <quiz.h>
#include <string.h>
#include <assert.h>
#include <apps/shared/global_context.h>
#include "../model/model.h"
#include "../regression_context.h"
#include "../store.h"
#include <poincare/helpers.h>
#include <poincare/test/helper.h>
using namespace Poincare;
using namespace Regression;
/* The data was generated by choosing X1 and the coefficients of the regression,
* then filling Y1 with the regression formula + random()/10. */
void setRegressionPoints(Regression::Store * store, int series, int numberOfPoints, double * xi, double * yi = nullptr) {
for (int i = 0; i < numberOfPoints; i++) {
store->set(xi[i], series, 0, i);
if (yi != nullptr) {
store->set(yi[i], series, 1, i);
}
}
}
void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::Type modelType, double * trueCoefficients, double trueR2) {
int series = 0;
Regression::Store store;
setRegressionPoints(&store, series, numberOfPoints, xi, yi);
store.setSeriesRegressionType(series, modelType);
Shared::GlobalContext globalContext;
RegressionContext context(&store, &globalContext);
double precision = 1e-2;
// When trueCoefficients = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
double reference = 1e6 * DBL_EPSILON;
// Compute and compare the coefficients
double * coefficients = store.coefficientsForSeries(series, &context);
int numberOfCoefs = store.modelForSeries(series)->numberOfCoefficients();
for (int i = 0; i < numberOfCoefs; i++) {
quiz_assert(IsApproximatelyEqual(coefficients[i], trueCoefficients[i], precision, reference));
}
// Compute and check r2 value and sign
double r2 = store.determinationCoefficientForSeries(series, &globalContext);
quiz_assert(r2 <= 1.0 && (r2 >= 0.0 || modelType == Model::Type::Proportional));
quiz_assert(IsApproximatelyEqual(r2, trueR2, precision, reference));
}
QUIZ_CASE(linear_regression) {
double x[] = {1.0, 8.0, 14.0, 79.0};
double y[] = {-3.581, 20.296, 40.676, 261.623};
double coefficients[] = {3.4, -7.0};
double r2 = 1.0;
assert_regression_is(x, y, 4, Model::Type::Linear, coefficients, r2);
}
QUIZ_CASE(linear_regression2) {
double x[] = {-5.0, 2.0, 4.0, 5.6, 9.0};
double y[] = {22.0, 1.0, 13.0, 28.36, 78.0};
double coefficients[] = {3.31824, 18.1191};
double r2 = 0.343;
assert_regression_is(x, y, 5, Model::Type::Linear, coefficients, r2);
}
QUIZ_CASE(proportional_regression) {
double x[] = {7.0, 5.0, 1.0, 9.0, 3.0};
double y[] = {-41.4851, -29.62186, -6.454245, -53.4976, -18.03325};
double coefficients[] = {-5.89};
double r2 = 0.9999648161902982;
assert_regression_is(x, y, 5, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(proportional_regression2) {
constexpr int numberOfPoints = 4;
double x[numberOfPoints] = {5.0, 2.0, 3.0, 4.0};
double y[numberOfPoints] = {10.0, 6.0, 7.0, 8.0};
double coefficients[] = {2.12963963};
double r2 = 0.53227513227513223;
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(proportional_regression3) {
constexpr int numberOfPoints = 4;
double x[numberOfPoints] = {1.0, 2.0, 3.0, 4.0};
double y[numberOfPoints] = {0.0, 0.0, 0.0, 0.0};
double coefficients[] = {0.0};
double r2 = 1.0;
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(proportional_regression4) {
constexpr int numberOfPoints = 3;
double x[numberOfPoints] = {-1.0, 0.0, 1.0};
double y[numberOfPoints] = {1.0, 1.0, 1.0};
double coefficients[] = {0.0};
// Y is constant, and proportional regression cannot fit it, R2 is null.
double r2 = 0.0;
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(proportional_regression5) {
constexpr int numberOfPoints = 3;
double x[numberOfPoints] = {-1.0, 0.0, 1.0};
double y[numberOfPoints] = {1.0, 1.01, 1.0};
double coefficients[] = {0.0};
/* In this case, proportional regression performed poorly compared to a
* constant regression, R2 is negative. */
double r2 = -45300.5;
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
}
QUIZ_CASE(quadratic_regression) {
double x[] = {-34.0, -12.0, 5.0, 86.0, -2.0};
double y[] = {-8241.389, -1194.734, -59.163, - 46245.39, -71.774};
double coefficients[] = {-6.50001, 21.3004, -3.15799};
double r2 = 1.0;
assert_regression_is(x, y, 5, Model::Type::Quadratic, coefficients, r2);
}
QUIZ_CASE(cubic_regression) {
double x[] = {-3.0, -2.8, -1.0, 0.0, 12.0};
double y[] = {691.261, 566.498, 20.203, -12.865, -34293.21};
double coefficients[] = {-21.2015, 16.0141, 4.14522, -12.8658};
double r2 = 1.0;
assert_regression_is(x, y, 5, Model::Type::Cubic, coefficients, r2);
}
QUIZ_CASE(quartic_regression) {
double x[] = {1.6, 3.5, 3.5, -2.8, 6.4, 5.3, 2.9, -4.8, -5.7, 3.1};
double y[] = {-112.667, -1479.824, -1479.805, 1140.276, -9365.505, -5308.355, -816.925, 5554.007, 9277.107, -1009.874};
double coefficients[] = {0.59998, -42.9998, 21.5015, 3.09232, -0.456824};
double r2 = 1.0;
assert_regression_is(x, y, 10, Model::Type::Quartic, coefficients, r2);
}
QUIZ_CASE(logarithmic_regression) {
double x[] = {0.2, 0.5, 5, 7};
double y[] = {-11.952, -9.035, -1.695, -0.584};
double coefficients[] = {3.19383, -6.81679};
double r2 = 0.999994;
assert_regression_is(x, y, 4, Model::Type::Logarithmic, coefficients, r2);
}
QUIZ_CASE(exponential_regression) {
double x[] = {5.5, 5.6, 5.7, 5.8, 5.9, 6.0};
double y[] = {-276.842, -299.956, -324.933, -352.0299, -381.314, -413.0775};
double coefficients[] = {-3.4, 0.8};
double r2 = 1.0;
assert_regression_is(x, y, 6, Model::Type::Exponential, coefficients, r2);
}
QUIZ_CASE(exponential_regression2) {
double x[] = {0, 1, 2, 3};
double y[] = {3000, 3315.513, 3664.208, 4049.576};
double coefficients[] = {3000, .1};
double r2 = 1.0;
assert_regression_is(x, y, 4, Model::Type::Exponential, coefficients, r2);
}
QUIZ_CASE(exponential_regression3) {
double x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
double y[] = {-1, -.3678794, -.1353353, -.04978707, -.01831564, -.006737947, -.002478752, -.000911882, -.0003354626, -.0001234098, -.00004539993};
double coefficients[] = {-1, -1};
double r2 = 0.9999999999999992;
assert_regression_is(x, y, 11, Model::Type::Exponential, coefficients, r2);
// TODO : This data produce a wrong fit currently
// double x2[] = {1.0, 2.0, 3.0, 4.0};
// double y2[] = {2.0, 3.0, 4.0, 1.0};
// double coefficients2[] = {2.905, -0.0606857};
// double r22 = 0.838388;
// assert_regression_is(x2, y2, 4, Model::Type::Exponential, coefficients2, r22);
}
QUIZ_CASE(power_regression) {
double x[] = {1.0, 50.0, 34.0, 67.0, 20.0};
double y[] = {71.860, 2775514, 979755.1, 6116830, 233832.9};
double coefficients[] = {71.8, 2.7};
double r2 = 1.0;
assert_regression_is(x, y, 5, Model::Type::Power, coefficients, r2);
// TODO : This data produce a wrong fit currently
// double x2[] = {1.0, 2.0, 3.0, 4.0};
// double y2[] = {2.0, 3.0, 4.0, 1.0};
// double coefficients2[] = {2.54948, -0.0247463};
// double r22 = 0.833509;
// assert_regression_is(x2, y2, 4, Model::Type::Power, coefficients2, r22);
}
void assert_trigonomatric_regression_is(double * xi, double * yi, int numberOfPoints, double * trueCoefficients, double trueR2, Poincare::Preferences::AngleUnit trueCoeffcientsUnit) {
// Test the trigonometric regression at all angle units
const Preferences::AngleUnit previousAngleUnit = Preferences::sharedPreferences()->angleUnit();
const Poincare::Preferences::AngleUnit units[3] = {Poincare::Preferences::AngleUnit::Radian, Poincare::Preferences::AngleUnit::Degree, Poincare::Preferences::AngleUnit::Gradian};
for (int i = 0; i < 3; ++i) {
Poincare::Preferences::AngleUnit unit = units[i];
Poincare::Preferences::sharedPreferences()->setAngleUnit(unit);
double unitFactor = Trigonometry::PiInAngleUnit(unit) / Trigonometry::PiInAngleUnit(trueCoeffcientsUnit);
// True coefficients b and c are converted to the tested angle unit
double coefficientsUnit[] = {trueCoefficients[0], trueCoefficients[1] * unitFactor, trueCoefficients[2] * unitFactor, trueCoefficients[3]};
assert_regression_is(xi, yi, numberOfPoints, Model::Type::Trigonometric, coefficientsUnit, trueR2);
}
// Restore previous angleUnit
Poincare::Preferences::sharedPreferences()->setAngleUnit(previousAngleUnit);
}
QUIZ_CASE(trigonometric_regression1) {
double r2 = 0.9994216;
double x[] = {1, 31, 61, 91, 121, 151, 181, 211, 241, 271, 301, 331, 361};
double y[] = {9.24, 10.05, 11.33, 12.72, 14.16, 14.98, 15.14, 14.41, 13.24, 11.88, 10.54, 9.48, 9.19};
double coefficients[] = {2.9723, 0.016780, -1.3067, 12.146};
int numberOfPoints = sizeof(x) / sizeof(double);
assert(sizeof(y) == sizeof(double) * numberOfPoints);
assert_trigonomatric_regression_is(x, y, numberOfPoints, coefficients, r2, Poincare::Preferences::AngleUnit::Radian);
}
QUIZ_CASE(trigonometric_regression2) {
double r2 = 0.9154;
double x[] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48};
double y[] = {-2, -4, -5, -2, 3, 6, 8, 11, 9, 5, 2, 1, 0, -3, -5, -2, 3, 5, 7, 10, 10, 5, 2, 2, 1};
double coefficients[] = {6.42, 0.26, -2.16, 2.82};
int numberOfPoints = sizeof(x) / sizeof(double);
assert(sizeof(y) == sizeof(double) * numberOfPoints);
assert_trigonomatric_regression_is(x, y, numberOfPoints, coefficients, r2, Poincare::Preferences::AngleUnit::Radian);
}
QUIZ_CASE(logistic_regression) {
/* This data was generated without the random error, otherwise it did not pass
* the test. */
double x1[] = {2.3, 5.6, 1.1, 4.3};
double y1[] = {3.948, 4.694, 2.184, 4.656};
double coefficients1[] = {6, 1.5, 4.7};
double r21 = 0.9999999917270119;
assert_regression_is(x1, y1, 4, Model::Type::Logistic, coefficients1, r21);
// This data produced a wrong fit before
double x2[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
double y2[] = {5.0, 9.0, 40.0, 64.0, 144.0, 200.0, 269.0, 278.0, 290.0, 295.0};
double coefficients2[] = {64.9, 1.0, 297.4};
double r22 = 0.9984396821656006;
assert_regression_is(x2, y2, 10, Model::Type::Logistic, coefficients2, r22);
// TODO : This data produce a wrong fit currently
// double x3[] = {1.0, 3.0, 4.0, 6.0, 8.0};
// double y3[] = {4.0, 4.0, 0.0, 58.0, 5.0};
// No source of truth for coefficient, r2 should at least be positive.
// double coefficients3[] = {-0.1, -0.4, -4};
// double r23 = 0.75;
// assert_regression_is(x3, y3, 5, Model::Type::Logistic, coefficients3, r23);
}
// Testing column and regression calculation
void assert_column_calculations_is(double * xi, int numberOfPoints, double trueMean, double trueSum, double trueSquaredSum, double trueStandardDeviation, double trueVariance) {
int series = 0;
Regression::Store store;
setRegressionPoints(&store, series, numberOfPoints, xi);
// Compute and compare the regression calculations metrics
double mean = store.meanOfColumn(series,0);
double sum = store.sumOfColumn(series,0);
double squaredSum = store.squaredValueSumOfColumn(series,0);
double standardDeviation = store.standardDeviationOfColumn(series,0);
double variance = store.varianceOfColumn(series,0);
// Check that squaredSum, standardDeviation and variance are positive
quiz_assert(squaredSum >= 0.0);
quiz_assert(standardDeviation >= 0.0);
quiz_assert(variance >= 0.0);
double precision = 1e-3;
// When the expected value is 0, the expected coefficient must be negligible against reference.
// The least likely value to be null is trueSquaredSum
double reference = trueSquaredSum;
quiz_assert(IsApproximatelyEqual(variance, trueVariance, precision, reference));
quiz_assert(IsApproximatelyEqual(squaredSum, trueSquaredSum, precision, reference));
// adapt the reference
reference = std::sqrt(trueSquaredSum);
quiz_assert(IsApproximatelyEqual(mean, trueMean, precision, reference));
quiz_assert(IsApproximatelyEqual(sum, trueSum, precision, reference));
quiz_assert(IsApproximatelyEqual(standardDeviation, trueStandardDeviation, precision, reference));
}
QUIZ_CASE(column_calculation) {
double x[] = {2.3, 5.6, 1.1, 4.3};
double mean = 3.325;
double sum = 13.3;
double squaredSum = 56.35;
double standardDeviation = 1.741228;
double variance = 3.031875;
assert_column_calculations_is(x, 4, mean, sum, squaredSum, standardDeviation, variance);
}
QUIZ_CASE(constant_column_calculation) {
// This data produced a negative variance before
double x[] = {-996.85840734641, -996.85840734641, -996.85840734641};
double mean = -996.85840734641;
double sum = -2990.57522203923;
double squaredSum = 2981180.0528916633;
double standardDeviation = 0.0;
double variance = 0.0;
assert_column_calculations_is(x, 3, mean, sum, squaredSum, standardDeviation, variance);
}
void assert_regression_calculations_is(double * xi, double * yi, int numberOfPoints, double trueCovariance, double trueProductSum, double trueR) {
int series = 0;
Regression::Store store;
setRegressionPoints(&store, series, numberOfPoints, xi, yi);
double precision = 1e-3;
// Compute and compare the regression calculations metrics
double covariance = store.covariance(series);
double productSum = store.columnProductSum(series);
// trueProductSum and trueCovariance are using each other as reference
// By construction, they often have a close value with a numberOfPoints factor
quiz_assert(IsApproximatelyEqual(covariance, trueCovariance, precision, trueProductSum / numberOfPoints));
quiz_assert(IsApproximatelyEqual(productSum, trueProductSum, precision, trueCovariance * numberOfPoints));
// When trueR = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
// sqrt is used because the R is computed from sqrt(V1*V0)
double reference = 100.0 * std::sqrt(DBL_EPSILON);
double r = store.correlationCoefficient(series);
quiz_assert(r >= 0.0);
quiz_assert(IsApproximatelyEqual(r, trueR, precision, reference));
}
QUIZ_CASE(regression_calculation) {
double x[] = {1.0, 50.0, 34.0, 67.0, 20.0};
double y[] = {71.860, 2775514, 979755.1, 6116830.0, 233832.9};
double covariance = 4.7789036e7;
double productSum = 586591713.26;
double r = 0.919088;
assert_regression_calculations_is(x, y, 5, covariance, productSum, r);
}