mirror of
https://github.com/UpsilonNumworks/Upsilon.git
synced 2026-01-19 08:47:28 +01:00
340 lines
14 KiB
C++
340 lines
14 KiB
C++
#include <quiz.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <apps/shared/global_context.h>
|
|
#include "../model/model.h"
|
|
#include "../regression_context.h"
|
|
#include "../store.h"
|
|
#include <poincare/helpers.h>
|
|
#include <poincare/test/helper.h>
|
|
|
|
using namespace Poincare;
|
|
using namespace Regression;
|
|
|
|
/* The data was generated by choosing X1 and the coefficients of the regression,
|
|
* then filling Y1 with the regression formula + random()/10. */
|
|
|
|
void setRegressionPoints(Regression::Store * store, int series, int numberOfPoints, double * xi, double * yi = nullptr) {
|
|
for (int i = 0; i < numberOfPoints; i++) {
|
|
store->set(xi[i], series, 0, i);
|
|
if (yi != nullptr) {
|
|
store->set(yi[i], series, 1, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::Type modelType, double * trueCoefficients, double trueR2) {
|
|
int series = 0;
|
|
Regression::Store store;
|
|
|
|
setRegressionPoints(&store, series, numberOfPoints, xi, yi);
|
|
store.setSeriesRegressionType(series, modelType);
|
|
Shared::GlobalContext globalContext;
|
|
RegressionContext context(&store, &globalContext);
|
|
|
|
double precision = 1e-2;
|
|
// When trueCoefficients = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
|
|
double reference = 1e6 * DBL_EPSILON;
|
|
|
|
// Compute and compare the coefficients
|
|
double * coefficients = store.coefficientsForSeries(series, &context);
|
|
int numberOfCoefs = store.modelForSeries(series)->numberOfCoefficients();
|
|
for (int i = 0; i < numberOfCoefs; i++) {
|
|
quiz_assert(IsApproximatelyEqual(coefficients[i], trueCoefficients[i], precision, reference));
|
|
}
|
|
|
|
// Compute and check r2 value and sign
|
|
double r2 = store.determinationCoefficientForSeries(series, &globalContext);
|
|
quiz_assert(r2 <= 1.0 && (r2 >= 0.0 || modelType == Model::Type::Proportional));
|
|
quiz_assert(IsApproximatelyEqual(r2, trueR2, precision, reference));
|
|
}
|
|
|
|
QUIZ_CASE(linear_regression) {
|
|
double x[] = {1.0, 8.0, 14.0, 79.0};
|
|
double y[] = {-3.581, 20.296, 40.676, 261.623};
|
|
double coefficients[] = {3.4, -7.0};
|
|
double r2 = 1.0;
|
|
assert_regression_is(x, y, 4, Model::Type::Linear, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(linear_regression2) {
|
|
double x[] = {-5.0, 2.0, 4.0, 5.6, 9.0};
|
|
double y[] = {22.0, 1.0, 13.0, 28.36, 78.0};
|
|
double coefficients[] = {3.31824, 18.1191};
|
|
double r2 = 0.343;
|
|
assert_regression_is(x, y, 5, Model::Type::Linear, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(proportional_regression) {
|
|
double x[] = {7.0, 5.0, 1.0, 9.0, 3.0};
|
|
double y[] = {-41.4851, -29.62186, -6.454245, -53.4976, -18.03325};
|
|
double coefficients[] = {-5.89};
|
|
double r2 = 0.9999648161902982;
|
|
assert_regression_is(x, y, 5, Model::Type::Proportional, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(proportional_regression2) {
|
|
constexpr int numberOfPoints = 4;
|
|
double x[numberOfPoints] = {5.0, 2.0, 3.0, 4.0};
|
|
double y[numberOfPoints] = {10.0, 6.0, 7.0, 8.0};
|
|
double coefficients[] = {2.12963963};
|
|
double r2 = 0.53227513227513223;
|
|
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(proportional_regression3) {
|
|
constexpr int numberOfPoints = 4;
|
|
double x[numberOfPoints] = {1.0, 2.0, 3.0, 4.0};
|
|
double y[numberOfPoints] = {0.0, 0.0, 0.0, 0.0};
|
|
double coefficients[] = {0.0};
|
|
double r2 = 1.0;
|
|
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(proportional_regression4) {
|
|
constexpr int numberOfPoints = 3;
|
|
double x[numberOfPoints] = {-1.0, 0.0, 1.0};
|
|
double y[numberOfPoints] = {1.0, 1.0, 1.0};
|
|
double coefficients[] = {0.0};
|
|
// Y is constant, and proportional regression cannot fit it, R2 is null.
|
|
double r2 = 0.0;
|
|
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(proportional_regression5) {
|
|
constexpr int numberOfPoints = 3;
|
|
double x[numberOfPoints] = {-1.0, 0.0, 1.0};
|
|
double y[numberOfPoints] = {1.0, 1.01, 1.0};
|
|
double coefficients[] = {0.0};
|
|
/* In this case, proportional regression performed poorly compared to a
|
|
* constant regression, R2 is negative. */
|
|
double r2 = -45300.5;
|
|
assert_regression_is(x, y, numberOfPoints, Model::Type::Proportional, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(quadratic_regression) {
|
|
double x[] = {-34.0, -12.0, 5.0, 86.0, -2.0};
|
|
double y[] = {-8241.389, -1194.734, -59.163, - 46245.39, -71.774};
|
|
double coefficients[] = {-6.50001, 21.3004, -3.15799};
|
|
double r2 = 1.0;
|
|
assert_regression_is(x, y, 5, Model::Type::Quadratic, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(cubic_regression) {
|
|
double x[] = {-3.0, -2.8, -1.0, 0.0, 12.0};
|
|
double y[] = {691.261, 566.498, 20.203, -12.865, -34293.21};
|
|
double coefficients[] = {-21.2015, 16.0141, 4.14522, -12.8658};
|
|
double r2 = 1.0;
|
|
assert_regression_is(x, y, 5, Model::Type::Cubic, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(quartic_regression) {
|
|
double x[] = {1.6, 3.5, 3.5, -2.8, 6.4, 5.3, 2.9, -4.8, -5.7, 3.1};
|
|
double y[] = {-112.667, -1479.824, -1479.805, 1140.276, -9365.505, -5308.355, -816.925, 5554.007, 9277.107, -1009.874};
|
|
double coefficients[] = {0.59998, -42.9998, 21.5015, 3.09232, -0.456824};
|
|
double r2 = 1.0;
|
|
assert_regression_is(x, y, 10, Model::Type::Quartic, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(logarithmic_regression) {
|
|
double x[] = {0.2, 0.5, 5, 7};
|
|
double y[] = {-11.952, -9.035, -1.695, -0.584};
|
|
double coefficients[] = {3.19383, -6.81679};
|
|
double r2 = 0.999994;
|
|
assert_regression_is(x, y, 4, Model::Type::Logarithmic, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(exponential_regression) {
|
|
double x[] = {5.5, 5.6, 5.7, 5.8, 5.9, 6.0};
|
|
double y[] = {-276.842, -299.956, -324.933, -352.0299, -381.314, -413.0775};
|
|
double coefficients[] = {-3.4, 0.8};
|
|
double r2 = 1.0;
|
|
assert_regression_is(x, y, 6, Model::Type::Exponential, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(exponential_regression2) {
|
|
double x[] = {0, 1, 2, 3};
|
|
double y[] = {3000, 3315.513, 3664.208, 4049.576};
|
|
double coefficients[] = {3000, .1};
|
|
double r2 = 1.0;
|
|
assert_regression_is(x, y, 4, Model::Type::Exponential, coefficients, r2);
|
|
}
|
|
|
|
QUIZ_CASE(exponential_regression3) {
|
|
double x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
|
double y[] = {-1, -.3678794, -.1353353, -.04978707, -.01831564, -.006737947, -.002478752, -.000911882, -.0003354626, -.0001234098, -.00004539993};
|
|
double coefficients[] = {-1, -1};
|
|
double r2 = 0.9999999999999992;
|
|
assert_regression_is(x, y, 11, Model::Type::Exponential, coefficients, r2);
|
|
|
|
// TODO : This data produce a wrong fit currently
|
|
// double x2[] = {1.0, 2.0, 3.0, 4.0};
|
|
// double y2[] = {2.0, 3.0, 4.0, 1.0};
|
|
// double coefficients2[] = {2.905, -0.0606857};
|
|
// double r22 = 0.838388;
|
|
// assert_regression_is(x2, y2, 4, Model::Type::Exponential, coefficients2, r22);
|
|
}
|
|
|
|
QUIZ_CASE(power_regression) {
|
|
double x[] = {1.0, 50.0, 34.0, 67.0, 20.0};
|
|
double y[] = {71.860, 2775514, 979755.1, 6116830, 233832.9};
|
|
double coefficients[] = {71.8, 2.7};
|
|
double r2 = 1.0;
|
|
assert_regression_is(x, y, 5, Model::Type::Power, coefficients, r2);
|
|
|
|
// TODO : This data produce a wrong fit currently
|
|
// double x2[] = {1.0, 2.0, 3.0, 4.0};
|
|
// double y2[] = {2.0, 3.0, 4.0, 1.0};
|
|
// double coefficients2[] = {2.54948, -0.0247463};
|
|
// double r22 = 0.833509;
|
|
// assert_regression_is(x2, y2, 4, Model::Type::Power, coefficients2, r22);
|
|
}
|
|
|
|
QUIZ_CASE(trigonometric_regression) {
|
|
Preferences::AngleUnit previousAngleUnit = Preferences::sharedPreferences()->angleUnit();
|
|
double r2 = 0.9994216;
|
|
double x[] = {1, 31, 61, 91, 121, 151, 181, 211, 241, 271, 301, 331, 361};
|
|
double y[] = {9.24, 10.05, 11.33, 12.72, 14.16, 14.98, 15.14, 14.41, 13.24, 11.88, 10.54, 9.48, 9.19};
|
|
double coefficients[] = {2.9723, 0.016780, -1.3067, 12.146};
|
|
int numberOfPoints = sizeof(x) / sizeof(double);
|
|
assert(sizeof(y) == sizeof(double) * numberOfPoints);
|
|
|
|
// TODO : Ensure unicity with trigonometric coefficients.
|
|
Poincare::Preferences::sharedPreferences()->setAngleUnit(Poincare::Preferences::AngleUnit::Radian);
|
|
// a*sin(b*x+c)+d = -a*sin(b*x+c+π)+d
|
|
double coefficientsRad[] = {-coefficients[0], coefficients[1], coefficients[2] + M_PI, coefficients[3]};
|
|
assert_regression_is(x, y, numberOfPoints, Model::Type::Trigonometric, coefficientsRad, r2);
|
|
|
|
Poincare::Preferences::sharedPreferences()->setAngleUnit(Poincare::Preferences::AngleUnit::Degree);
|
|
double radToDeg = 180.0 / M_PI;
|
|
// a*sin(b*x+c)+d = a*sin(b*x+c+2π)+d
|
|
double coefficientsDeg[] = {coefficients[0], coefficients[1] * radToDeg, (coefficients[2] - 2.0 * M_PI) * radToDeg, coefficients[3]};
|
|
assert_regression_is(x, y, numberOfPoints, Model::Type::Trigonometric, coefficientsDeg, r2);
|
|
|
|
Poincare::Preferences::sharedPreferences()->setAngleUnit(Poincare::Preferences::AngleUnit::Gradian);
|
|
double radToGrad = 200.0 / M_PI;
|
|
// a*sin(b*x+c)+d = a*sin(b*x+c+2π)+d
|
|
double coefficientsGrad[] = {coefficients[0], coefficients[1] * radToGrad, (coefficients[2] - 2.0 * M_PI) * radToGrad, coefficients[3]};
|
|
assert_regression_is(x, y, numberOfPoints, Model::Type::Trigonometric, coefficientsGrad, r2);
|
|
|
|
Poincare::Preferences::sharedPreferences()->setAngleUnit(previousAngleUnit);
|
|
}
|
|
|
|
|
|
QUIZ_CASE(logistic_regression) {
|
|
/* This data was generated without the random error, otherwise it did not pass
|
|
* the test. */
|
|
double x1[] = {2.3, 5.6, 1.1, 4.3};
|
|
double y1[] = {3.948, 4.694, 2.184, 4.656};
|
|
double coefficients1[] = {6, 1.5, 4.7};
|
|
double r21 = 0.9999999917270119;
|
|
assert_regression_is(x1, y1, 4, Model::Type::Logistic, coefficients1, r21);
|
|
|
|
// This data produced a wrong fit before
|
|
double x2[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
|
|
double y2[] = {5.0, 9.0, 40.0, 64.0, 144.0, 200.0, 269.0, 278.0, 290.0, 295.0};
|
|
double coefficients2[] = {64.9, 1.0, 297.4};
|
|
double r22 = 0.9984396821656006;
|
|
assert_regression_is(x2, y2, 10, Model::Type::Logistic, coefficients2, r22);
|
|
|
|
// TODO : This data produce a wrong fit currently
|
|
// double x3[] = {1.0, 3.0, 4.0, 6.0, 8.0};
|
|
// double y3[] = {4.0, 4.0, 0.0, 58.0, 5.0};
|
|
// No source of truth for coefficient, r2 should at least be positive.
|
|
// double coefficients3[] = {-0.1, -0.4, -4};
|
|
// double r23 = 0.75;
|
|
// assert_regression_is(x3, y3, 5, Model::Type::Logistic, coefficients3, r23);
|
|
}
|
|
|
|
// Testing column and regression calculation
|
|
|
|
void assert_column_calculations_is(double * xi, int numberOfPoints, double trueMean, double trueSum, double trueSquaredSum, double trueStandardDeviation, double trueVariance) {
|
|
int series = 0;
|
|
Regression::Store store;
|
|
|
|
setRegressionPoints(&store, series, numberOfPoints, xi);
|
|
|
|
// Compute and compare the regression calculations metrics
|
|
double mean = store.meanOfColumn(series,0);
|
|
double sum = store.sumOfColumn(series,0);
|
|
double squaredSum = store.squaredValueSumOfColumn(series,0);
|
|
double standardDeviation = store.standardDeviationOfColumn(series,0);
|
|
double variance = store.varianceOfColumn(series,0);
|
|
|
|
// Check that squaredSum, standardDeviation and variance are positive
|
|
quiz_assert(squaredSum >= 0.0);
|
|
quiz_assert(standardDeviation >= 0.0);
|
|
quiz_assert(variance >= 0.0);
|
|
|
|
double precision = 1e-3;
|
|
// When the expected value is 0, the expected coefficient must be negligible against reference.
|
|
// The least likely value to be null is trueSquaredSum
|
|
double reference = trueSquaredSum;
|
|
|
|
quiz_assert(IsApproximatelyEqual(variance, trueVariance, precision, reference));
|
|
quiz_assert(IsApproximatelyEqual(squaredSum, trueSquaredSum, precision, reference));
|
|
|
|
// adapt the reference
|
|
reference = std::sqrt(trueSquaredSum);
|
|
|
|
quiz_assert(IsApproximatelyEqual(mean, trueMean, precision, reference));
|
|
quiz_assert(IsApproximatelyEqual(sum, trueSum, precision, reference));
|
|
quiz_assert(IsApproximatelyEqual(standardDeviation, trueStandardDeviation, precision, reference));
|
|
}
|
|
|
|
QUIZ_CASE(column_calculation) {
|
|
double x[] = {2.3, 5.6, 1.1, 4.3};
|
|
double mean = 3.325;
|
|
double sum = 13.3;
|
|
double squaredSum = 56.35;
|
|
double standardDeviation = 1.741228;
|
|
double variance = 3.031875;
|
|
assert_column_calculations_is(x, 4, mean, sum, squaredSum, standardDeviation, variance);
|
|
}
|
|
|
|
QUIZ_CASE(constant_column_calculation) {
|
|
// This data produced a negative variance before
|
|
double x[] = {-996.85840734641, -996.85840734641, -996.85840734641};
|
|
double mean = -996.85840734641;
|
|
double sum = -2990.57522203923;
|
|
double squaredSum = 2981180.0528916633;
|
|
double standardDeviation = 0.0;
|
|
double variance = 0.0;
|
|
assert_column_calculations_is(x, 3, mean, sum, squaredSum, standardDeviation, variance);
|
|
}
|
|
|
|
void assert_regression_calculations_is(double * xi, double * yi, int numberOfPoints, double trueCovariance, double trueProductSum, double trueR) {
|
|
int series = 0;
|
|
Regression::Store store;
|
|
|
|
setRegressionPoints(&store, series, numberOfPoints, xi, yi);
|
|
|
|
double precision = 1e-3;
|
|
|
|
// Compute and compare the regression calculations metrics
|
|
double covariance = store.covariance(series);
|
|
double productSum = store.columnProductSum(series);
|
|
|
|
// trueProductSum and trueCovariance are using each other as reference
|
|
// By construction, they often have a close value with a numberOfPoints factor
|
|
quiz_assert(IsApproximatelyEqual(covariance, trueCovariance, precision, trueProductSum / numberOfPoints));
|
|
quiz_assert(IsApproximatelyEqual(productSum, trueProductSum, precision, trueCovariance * numberOfPoints));
|
|
|
|
// When trueR = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
|
|
// sqrt is used because the R is computed from sqrt(V1*V0)
|
|
double reference = 100.0 * std::sqrt(DBL_EPSILON);
|
|
|
|
double r = store.correlationCoefficient(series);
|
|
quiz_assert(r >= 0.0);
|
|
quiz_assert(IsApproximatelyEqual(r, trueR, precision, reference));
|
|
}
|
|
|
|
QUIZ_CASE(regression_calculation) {
|
|
double x[] = {1.0, 50.0, 34.0, 67.0, 20.0};
|
|
double y[] = {71.860, 2775514, 979755.1, 6116830.0, 233832.9};
|
|
double covariance = 4.7789036e7;
|
|
double productSum = 586591713.26;
|
|
double r = 0.919088;
|
|
assert_regression_calculations_is(x, y, 5, covariance, productSum, r);
|
|
}
|