[quiz] Improve test possibilities for regression and statistics

Change-Id: I4414fad24e10dcbd56cd9aff1e35e00ba66dda2c
This commit is contained in:
Hugo Saint-Vignes
2020-05-26 16:30:25 +02:00
committed by Émilie Feral
parent d16e49fc5f
commit 29b0e86225
4 changed files with 173 additions and 80 deletions

View File

@@ -5,6 +5,7 @@
#include "../model/model.h"
#include "../regression_context.h"
#include "../store.h"
#include <poincare/helpers.h>
using namespace Poincare;
using namespace Regression;
@@ -12,21 +13,6 @@ using namespace Regression;
/* The data was generated by choosing X1 and the coefficients of the regression,
* then filling Y1 with the regression formula + random()/10. */
double relativeError(double observedValue, double expectedValue) {
assert(expectedValue != 0.0);
return std::fabs((observedValue - expectedValue) / expectedValue);
}
void assert_value_is(double observedValue, double expectedValue) {
if (expectedValue != 0.0) {
double precision = 0.01;
quiz_assert(relativeError(observedValue, expectedValue) < precision);
} else {
// The expected value can't be null for relativeError, the exact value is then expected
quiz_assert(observedValue == expectedValue);
}
}
void setRegressionPoints(Regression::Store * store, int series, int numberOfPoints, double * xi, double * yi = nullptr) {
for (int i = 0; i < numberOfPoints; i++) {
store->set(xi[i], series, 0, i);
@@ -45,16 +31,21 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T
Shared::GlobalContext globalContext;
RegressionContext context(&store, &globalContext);
double precision = 1e-2;
// When trueCoefficients = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
double reference = 100.0 * DBL_EPSILON;
// Compute and compare the coefficients
double * coefficients = store.coefficientsForSeries(series, &context);
int numberOfCoefs = store.modelForSeries(series)->numberOfCoefficients();
for (int i = 0; i < numberOfCoefs; i++) {
assert_value_is(coefficients[i], trueCoefficients[i]);
quiz_assert(Helpers::IsApproximatelyEqual(coefficients[i], trueCoefficients[i], precision, reference));
}
// Compute and compare r2
// Compute and check r2 value and sign
double r2 = store.determinationCoefficientForSeries(series, &globalContext);
assert_value_is(r2, trueR2);
quiz_assert(r2 >= 0.0);
quiz_assert(Helpers::IsApproximatelyEqual(r2, trueR2, precision, reference));
}
QUIZ_CASE(linear_regression) {
@@ -188,11 +179,26 @@ void assert_column_calculations_is(double * xi, int numberOfPoints, double trueM
double squaredSum = store.squaredValueSumOfColumn(series,0);
double standardDeviation = store.standardDeviationOfColumn(series,0);
double variance = store.varianceOfColumn(series,0);
assert_value_is(mean, trueMean);
assert_value_is(sum, trueSum);
assert_value_is(squaredSum, trueSquaredSum);
assert_value_is(standardDeviation, trueStandardDeviation);
assert_value_is(variance, trueVariance);
// Check that squaredSum, standardDeviation and variance are positive
quiz_assert(squaredSum >= 0.0);
quiz_assert(standardDeviation >= 0.0);
quiz_assert(variance >= 0.0);
double precision = 1e-3;
// When the expected value is 0, the expected coefficient must be negligible against reference.
// The least likely value to be null is trueSquaredSum
double reference = trueSquaredSum;
quiz_assert(Helpers::IsApproximatelyEqual(variance, trueVariance, precision, reference));
quiz_assert(Helpers::IsApproximatelyEqual(squaredSum, trueSquaredSum, precision, reference));
// adapt the reference
reference = std::sqrt(trueSquaredSum);
quiz_assert(Helpers::IsApproximatelyEqual(mean, trueMean, precision, reference));
quiz_assert(Helpers::IsApproximatelyEqual(sum, trueSum, precision, reference));
quiz_assert(Helpers::IsApproximatelyEqual(standardDeviation, trueStandardDeviation, precision, reference));
}
QUIZ_CASE(column_calculation) {
@@ -207,12 +213,12 @@ QUIZ_CASE(column_calculation) {
QUIZ_CASE(constant_column_calculation) {
// This data produced a negative variance before
double x[] = {-996.8584, -996.8584, -996.8584};
double mean = -996.8584;
double sum = -2990.5752;
double squaredSum = 2.98118000895168e6;
double standardDeviation = 0;
double variance = 0;
double x[] = {-996.85840734641, -996.85840734641, -996.85840734641};
double mean = -996.85840734641;
double sum = -2990.57522203923;
double squaredSum = 2981180.0528916633;
double standardDeviation = 0.0;
double variance = 0.0;
assert_column_calculations_is(x, 3, mean, sum, squaredSum, standardDeviation, variance);
}
@@ -222,18 +228,29 @@ void assert_regression_calculations_is(double * xi, double * yi, int numberOfPoi
setRegressionPoints(&store, series, numberOfPoints, xi, yi);
double precision = 1e-3;
// Compute and compare the regression calculations metrics
double covariance = store.covariance(series);
double productSum = store.columnProductSum(series);
// trueProductSum and trueCovariance are using each other as reference
// By construction, they often have a close value with a numberOfPoints factor
quiz_assert(Helpers::IsApproximatelyEqual(covariance, trueCovariance, precision, trueProductSum / numberOfPoints));
quiz_assert(Helpers::IsApproximatelyEqual(productSum, trueProductSum, precision, trueCovariance * numberOfPoints));
// When trueR = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations
// sqrt is used because the R is computed from sqrt(V1*V0)
double reference = 100.0 * std::sqrt(DBL_EPSILON);
double r = store.correlationCoefficient(series);
assert_value_is(covariance, trueCovariance);
assert_value_is(productSum, trueProductSum);
assert_value_is(r, trueR);
quiz_assert(r >= 0.0);
quiz_assert(Helpers::IsApproximatelyEqual(r, trueR, precision, reference));
}
QUIZ_CASE(regression_calculation) {
double x[] = {1.0, 50.0, 34.0, 67.0, 20.0};
double y[] = {71.860, 2775514, 979755.1, 6116830, 233832.9};
double y[] = {71.860, 2775514, 979755.1, 6116830.0, 233832.9};
double covariance = 4.7789036e7;
double productSum = 586591713.26;
double r = 0.919088;

View File

@@ -3,41 +3,74 @@
#include <math.h>
#include <cmath>
#include "../store.h"
#include <poincare/helpers.h>
using namespace Poincare;
namespace Statistics {
void assert_value_approximately_equal_to(double d1, double d2) {
assert((std::isnan(d1) && std::isnan(d2))
|| (std::isinf(d1) && std::isinf(d2) && d1*d2 > 0 /*same sign*/)
|| fabs(d1-d2) < 0.001);
void assert_value_approximately_equal_to(double d1, double d2, double precision, double reference) {
quiz_assert((std::isnan(d1) && std::isnan(d2))
|| (std::isinf(d1) && std::isinf(d2) && d1 * d2 > 0.0 /*same sign*/)
|| Helpers::IsApproximatelyEqual(d1, d2, precision, reference));
}
void assert_data_statictics_equal_to(double n[], double v[], int numberOfData, double sumOfOccurrences, double maxValue, double minValue, double range, double mean, double variance, double standardDeviation, double sampleStandardDeviation, double firstQuartile, double thirdQuartile, double quartileRange, double median, double sum, double squaredValueSum) {
void assert_data_statictics_equal_to(double v[], double n[], int numberOfData, double trueSumOfOccurrences, double trueMaxValue, double trueMinValue, double trueRange, double trueMean, double trueVariance, double trueStandardDeviation, double trueSampleStandardDeviation, double trueFirstQuartile, double trueThirdQuartile, double trueQuartileRange, double trueMedian, double trueSum, double trueSquaredValueSum) {
Store store;
int seriesIndex = 0;
// Set the data in the store
for (int i = 0; i < numberOfData; i++) {
store.set(n[i], seriesIndex, 0, i);
store.set(v[i], seriesIndex, 1, i);
store.set(v[i], seriesIndex, 0, i);
store.set(n[i], seriesIndex, 1, i);
}
double precision = 1e-3;
// Compare the statistics
assert_value_approximately_equal_to(standardDeviation * standardDeviation, variance);
assert_value_approximately_equal_to(store.sumOfOccurrences(seriesIndex), sumOfOccurrences);
assert_value_approximately_equal_to(store.maxValue(seriesIndex), maxValue);
assert_value_approximately_equal_to(store.minValue(seriesIndex), minValue);
assert_value_approximately_equal_to(store.range(seriesIndex), range);
assert_value_approximately_equal_to(store.mean(seriesIndex), mean);
assert_value_approximately_equal_to(store.variance(seriesIndex), variance);
assert_value_approximately_equal_to(store.standardDeviation(seriesIndex), standardDeviation);
assert_value_approximately_equal_to(store.sampleStandardDeviation(seriesIndex), sampleStandardDeviation);
assert_value_approximately_equal_to(store.firstQuartile(seriesIndex), firstQuartile);
assert_value_approximately_equal_to(store.thirdQuartile(seriesIndex), thirdQuartile);
assert_value_approximately_equal_to(store.quartileRange(seriesIndex), quartileRange);
assert_value_approximately_equal_to(store.median(seriesIndex), median);
assert_value_approximately_equal_to(store.sum(seriesIndex), sum);
assert_value_approximately_equal_to(store.squaredValueSum(seriesIndex), squaredValueSum);
double sumOfOccurrences = store.sumOfOccurrences(seriesIndex);
double maxValue = store.maxValue(seriesIndex);
double minValue = store.minValue(seriesIndex);
double range = store.range(seriesIndex);
double mean = store.mean(seriesIndex);
double variance = store.variance(seriesIndex);
double standardDeviation = store.standardDeviation(seriesIndex);
double sampleStandardDeviation = store.sampleStandardDeviation(seriesIndex);
double firstQuartile = store.firstQuartile(seriesIndex);
double thirdQuartile = store.thirdQuartile(seriesIndex);
double quartileRange = store.quartileRange(seriesIndex);
double median = store.median(seriesIndex);
double sum = store.sum(seriesIndex);
double squaredValueSum = store.squaredValueSum(seriesIndex);
// Check the positive statistics
quiz_assert(range >= 0.0);
quiz_assert(variance >= 0.0);
quiz_assert(standardDeviation >= 0.0);
quiz_assert(sampleStandardDeviation >= 0.0);
quiz_assert(quartileRange >= 0.0);
quiz_assert(squaredValueSum >= 0.0);
double reference = trueSquaredValueSum;
assert_value_approximately_equal_to(variance, trueVariance, precision, reference);
assert_value_approximately_equal_to(squaredValueSum, trueSquaredValueSum, precision, reference);
reference = std::sqrt(trueSquaredValueSum);
assert_value_approximately_equal_to(trueStandardDeviation * trueStandardDeviation, trueVariance, precision, reference);
assert_value_approximately_equal_to(sumOfOccurrences, trueSumOfOccurrences, precision, reference);
assert_value_approximately_equal_to(mean, trueMean, precision, reference);
assert_value_approximately_equal_to(standardDeviation, trueStandardDeviation, precision, reference);
assert_value_approximately_equal_to(sampleStandardDeviation, trueSampleStandardDeviation, precision, reference);
assert_value_approximately_equal_to(firstQuartile, trueFirstQuartile, precision, reference);
assert_value_approximately_equal_to(thirdQuartile, trueThirdQuartile, precision, reference);
assert_value_approximately_equal_to(median, trueMedian, precision, reference);
assert_value_approximately_equal_to(sum, trueSum, precision, reference);
// Perfect match
assert_value_approximately_equal_to(maxValue, trueMaxValue, 0.0, 0.0);
assert_value_approximately_equal_to(minValue, trueMinValue, 0.0, 0.0);
assert_value_approximately_equal_to(range, trueRange, 0.0, 0.0);
assert_value_approximately_equal_to(quartileRange, trueQuartileRange, 0.0, 0.0);
}
QUIZ_CASE(data_statistics) {
@@ -46,11 +79,11 @@ QUIZ_CASE(data_statistics) {
* 1 1 1 1 */
constexpr int listLength1 = 4;
double n1[listLength1] = {1.0, 2.0, 3.0, 4.0};
double v1[listLength1] = {1.0, 1.0, 1.0, 1.0};
double v1[listLength1] = {1.0, 2.0, 3.0, 4.0};
double n1[listLength1] = {1.0, 1.0, 1.0, 1.0};
assert_data_statictics_equal_to(
n1,
v1,
n1,
listLength1,
/* sumOfOccurrences */ 4.0,
/* maxValue */ 4.0,
@@ -72,11 +105,11 @@ QUIZ_CASE(data_statistics) {
* 1 1 1 1 1 1 1 1 1 1 1 */
constexpr int listLength2 = 11;
double n2[listLength2] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0};
double v2[listLength2] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
double v2[listLength2] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0};
double n2[listLength2] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
assert_data_statictics_equal_to(
n2,
v2,
n2,
listLength2,
/* sumOfOccurrences */ 11.0,
/* maxValue */ 11.0,
@@ -96,12 +129,12 @@ QUIZ_CASE(data_statistics) {
/* 1 2 3 4 5 6 7 8 9 10 11 12
* 1 1 1 1 1 1 1 1 1 1 1 1 */
constexpr int listLength3 = 13;
double n3[listLength3] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0};
double v3[listLength3] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
constexpr int listLength3 = 12;
double v3[listLength3] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0};
double n3[listLength3] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
assert_data_statictics_equal_to(
n3,
v3,
n3,
listLength3,
/* sumOfOccurrences */ 12.0,
/* maxValue */ 12.0,
@@ -122,11 +155,11 @@ QUIZ_CASE(data_statistics) {
* 0.2 0.05 0.3 0.0001 0.4499 */
constexpr int listLength4 = 5;
double n4[listLength4] = {1.0, 2.0, 3.0, 5.0, 10.0};
double v4[listLength4] = {0.2, 0.05, 0.3, 0.0001, 0.4499};
double v4[listLength4] = {1.0, 2.0, 3.0, 5.0, 10.0};
double n4[listLength4] = {0.2, 0.05, 0.3, 0.0001, 0.4499};
assert_data_statictics_equal_to(
n4,
v4,
n4,
listLength4,
/* sumOfOccurrences */ 1.0,
/* maxValue */ 10.0,
@@ -147,11 +180,11 @@ QUIZ_CASE(data_statistics) {
* 0.4 0.00005 0.9 0.4 0.5 */
constexpr int listLength5 = 5;
double n5[listLength5] = {1.0, -2.0, 3.0, 5.0, 10.0};
double v5[listLength5] = {0.4, 0.00005, 0.9, 0.4, 0.5};
double v5[listLength5] = {1.0, -2.0, 3.0, 5.0, 10.0};
double n5[listLength5] = {0.4, 0.00005, 0.9, 0.4, 0.5};
assert_data_statictics_equal_to(
n5,
v5,
n5,
listLength5,
/* sumOfOccurrences */ 2.2,
/* maxValue */ 10.0,
@@ -172,11 +205,11 @@ QUIZ_CASE(data_statistics) {
* 4 5 3 1 9 */
constexpr int listLength6 = 6;
double n6[listLength6] = {-7.0, -10.0, 1.0, 2.0, 5.0, -2.0};
double v6[listLength6] = {4.0, 5.0, 3.0, 0.5, 1.0, 9.0};
double v6[listLength6] = {-7.0, -10.0, 1.0, 2.0, 5.0, -2.0};
double n6[listLength6] = {4.0, 5.0, 3.0, 0.5, 1.0, 9.0};
assert_data_statictics_equal_to(
n6,
v6,
n6,
listLength6,
/* sumOfOccurrences */ 22.5,
/* maxValue */ 5.0,
@@ -197,11 +230,11 @@ QUIZ_CASE(data_statistics) {
* 1 1 1 0 0 0 1 */
constexpr int listLength7 = 7;
double n7[listLength7] = {1.0, 1.0, 1.0, 10.0, 3.0, -1.0, 3.0};
double v7[listLength7] = {1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0};
double v7[listLength7] = {1.0, 1.0, 1.0, 10.0, 3.0, -1.0, 3.0};
double n7[listLength7] = {1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0};
assert_data_statictics_equal_to(
n7,
v7,
n7,
listLength7,
/* sumOfOccurrences */ 4.0,
/* maxValue */ 3.0,
@@ -222,11 +255,11 @@ QUIZ_CASE(data_statistics) {
* 0 1 0 1 */
constexpr int listLength8 = 4;
double n8[listLength8] = {1.0, 2.0, 3.0, 4.0};
double v8[listLength8] = {0.0, 1.0, 0.0, 1.0};
double v8[listLength8] = {1.0, 2.0, 3.0, 4.0};
double n8[listLength8] = {0.0, 1.0, 0.0, 1.0};
assert_data_statictics_equal_to(
n8,
v8,
n8,
listLength8,
/* sumOfOccurrences */ 2.0,
/* maxValue */ 4.0,
@@ -242,6 +275,31 @@ QUIZ_CASE(data_statistics) {
/* median */ 3.0,
/* sum */ 6.0,
/* squaredValueSum */ 20.0);
/* -996.85840734641
* 9 */
constexpr int listLength9 = 1;
double v9[listLength9] = {-996.85840734641};
double n9[listLength9] = {9};
assert_data_statictics_equal_to(
v9,
n9,
listLength9,
/* sumOfOccurrences */ 9.0,
/* maxValue */ -996.85840734641,
/* minValue */ -996.85840734641,
/* range */ 0.0,
/* mean */ -996.85840734641,
/* variance */ 0.0,
/* standardDeviation */ 0.0,
/* sampleStandardDeviation */ 0.0,
/* firstQuartile */ -996.85840734641,
/* thirdQuartile */ -996.85840734641,
/* quartileRange */ 0.0,
/* median */ -996.85840734641,
/* sum */ -8971.72566611769,
/* squaredValueSum */ 8943540.158675);
}
}

View File

@@ -11,6 +11,7 @@ namespace Helpers {
size_t AlignedSize(size_t realSize, size_t alignment);
size_t Gcd(size_t a, size_t b);
bool Rotate(uint32_t * dst, uint32_t * src, size_t len);
bool IsApproximatelyEqual(double observedValue, double expectedValue, double precision, double reference);
}

View File

@@ -1,5 +1,6 @@
#include <poincare/helpers.h>
#include <assert.h>
#include <cmath>
namespace Poincare {
@@ -97,5 +98,21 @@ bool Rotate(uint32_t * dst, uint32_t * src, size_t len) {
return true;
}
bool IsApproximatelyEqual(double observedValue, double expectedValue, double precision, double reference) {
/* Return true if observedValue and expectedValue are approximately equal, according to precision and reference parameters */
if (expectedValue != 0.0) {
double relativeError = std::fabs((observedValue - expectedValue) / expectedValue);
// The relative error must be smaller than the precision
return relativeError <= precision;
}
if (reference != 0.0) {
double referenceRatio = std::fabs(observedValue / reference);
// The observedValue must be negligible against the reference
return referenceRatio <= precision;
}
// The observedValue must exactly match the expectedValue
return observedValue == expectedValue;
}
}
}