From 29b0e86225eb0ce84d9016f6163e7fb869e87975 Mon Sep 17 00:00:00 2001 From: Hugo Saint-Vignes Date: Tue, 26 May 2020 16:30:25 +0200 Subject: [PATCH] [quiz] Improve test possibilities for regression and statistics Change-Id: I4414fad24e10dcbd56cd9aff1e35e00ba66dda2c --- apps/regression/test/model.cpp | 83 +++++++++------ apps/statistics/test/store.cpp | 152 +++++++++++++++++++--------- poincare/include/poincare/helpers.h | 1 + poincare/src/helpers.cpp | 17 ++++ 4 files changed, 173 insertions(+), 80 deletions(-) diff --git a/apps/regression/test/model.cpp b/apps/regression/test/model.cpp index 068435507..16b216114 100644 --- a/apps/regression/test/model.cpp +++ b/apps/regression/test/model.cpp @@ -5,6 +5,7 @@ #include "../model/model.h" #include "../regression_context.h" #include "../store.h" +#include using namespace Poincare; using namespace Regression; @@ -12,21 +13,6 @@ using namespace Regression; /* The data was generated by choosing X1 and the coefficients of the regression, * then filling Y1 with the regression formula + random()/10. */ -double relativeError(double observedValue, double expectedValue) { - assert(expectedValue != 0.0); - return std::fabs((observedValue - expectedValue) / expectedValue); -} - -void assert_value_is(double observedValue, double expectedValue) { - if (expectedValue != 0.0) { - double precision = 0.01; - quiz_assert(relativeError(observedValue, expectedValue) < precision); - } else { - // The expected value can't be null for relativeError, the exact value is then expected - quiz_assert(observedValue == expectedValue); - } -} - void setRegressionPoints(Regression::Store * store, int series, int numberOfPoints, double * xi, double * yi = nullptr) { for (int i = 0; i < numberOfPoints; i++) { store->set(xi[i], series, 0, i); @@ -45,16 +31,21 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T Shared::GlobalContext globalContext; RegressionContext context(&store, &globalContext); + double precision = 1e-2; + // When trueCoefficients = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations + double reference = 100.0 * DBL_EPSILON; + // Compute and compare the coefficients double * coefficients = store.coefficientsForSeries(series, &context); int numberOfCoefs = store.modelForSeries(series)->numberOfCoefficients(); for (int i = 0; i < numberOfCoefs; i++) { - assert_value_is(coefficients[i], trueCoefficients[i]); + quiz_assert(Helpers::IsApproximatelyEqual(coefficients[i], trueCoefficients[i], precision, reference)); } - // Compute and compare r2 + // Compute and check r2 value and sign double r2 = store.determinationCoefficientForSeries(series, &globalContext); - assert_value_is(r2, trueR2); + quiz_assert(r2 >= 0.0); + quiz_assert(Helpers::IsApproximatelyEqual(r2, trueR2, precision, reference)); } QUIZ_CASE(linear_regression) { @@ -188,11 +179,26 @@ void assert_column_calculations_is(double * xi, int numberOfPoints, double trueM double squaredSum = store.squaredValueSumOfColumn(series,0); double standardDeviation = store.standardDeviationOfColumn(series,0); double variance = store.varianceOfColumn(series,0); - assert_value_is(mean, trueMean); - assert_value_is(sum, trueSum); - assert_value_is(squaredSum, trueSquaredSum); - assert_value_is(standardDeviation, trueStandardDeviation); - assert_value_is(variance, trueVariance); + + // Check that squaredSum, standardDeviation and variance are positive + quiz_assert(squaredSum >= 0.0); + quiz_assert(standardDeviation >= 0.0); + quiz_assert(variance >= 0.0); + + double precision = 1e-3; + // When the expected value is 0, the expected coefficient must be negligible against reference. + // The least likely value to be null is trueSquaredSum + double reference = trueSquaredSum; + + quiz_assert(Helpers::IsApproximatelyEqual(variance, trueVariance, precision, reference)); + quiz_assert(Helpers::IsApproximatelyEqual(squaredSum, trueSquaredSum, precision, reference)); + + // adapt the reference + reference = std::sqrt(trueSquaredSum); + + quiz_assert(Helpers::IsApproximatelyEqual(mean, trueMean, precision, reference)); + quiz_assert(Helpers::IsApproximatelyEqual(sum, trueSum, precision, reference)); + quiz_assert(Helpers::IsApproximatelyEqual(standardDeviation, trueStandardDeviation, precision, reference)); } QUIZ_CASE(column_calculation) { @@ -207,12 +213,12 @@ QUIZ_CASE(column_calculation) { QUIZ_CASE(constant_column_calculation) { // This data produced a negative variance before - double x[] = {-996.8584, -996.8584, -996.8584}; - double mean = -996.8584; - double sum = -2990.5752; - double squaredSum = 2.98118000895168e6; - double standardDeviation = 0; - double variance = 0; + double x[] = {-996.85840734641, -996.85840734641, -996.85840734641}; + double mean = -996.85840734641; + double sum = -2990.57522203923; + double squaredSum = 2981180.0528916633; + double standardDeviation = 0.0; + double variance = 0.0; assert_column_calculations_is(x, 3, mean, sum, squaredSum, standardDeviation, variance); } @@ -222,18 +228,29 @@ void assert_regression_calculations_is(double * xi, double * yi, int numberOfPoi setRegressionPoints(&store, series, numberOfPoints, xi, yi); + double precision = 1e-3; + // Compute and compare the regression calculations metrics double covariance = store.covariance(series); double productSum = store.columnProductSum(series); + + // trueProductSum and trueCovariance are using each other as reference + // By construction, they often have a close value with a numberOfPoints factor + quiz_assert(Helpers::IsApproximatelyEqual(covariance, trueCovariance, precision, trueProductSum / numberOfPoints)); + quiz_assert(Helpers::IsApproximatelyEqual(productSum, trueProductSum, precision, trueCovariance * numberOfPoints)); + + // When trueR = 0, a DBL_EPSILON reference ensures that the only accepted errors are due to double approximations + // sqrt is used because the R is computed from sqrt(V1*V0) + double reference = 100.0 * std::sqrt(DBL_EPSILON); + double r = store.correlationCoefficient(series); - assert_value_is(covariance, trueCovariance); - assert_value_is(productSum, trueProductSum); - assert_value_is(r, trueR); + quiz_assert(r >= 0.0); + quiz_assert(Helpers::IsApproximatelyEqual(r, trueR, precision, reference)); } QUIZ_CASE(regression_calculation) { double x[] = {1.0, 50.0, 34.0, 67.0, 20.0}; - double y[] = {71.860, 2775514, 979755.1, 6116830, 233832.9}; + double y[] = {71.860, 2775514, 979755.1, 6116830.0, 233832.9}; double covariance = 4.7789036e7; double productSum = 586591713.26; double r = 0.919088; diff --git a/apps/statistics/test/store.cpp b/apps/statistics/test/store.cpp index e5a08264d..1126dd8d9 100644 --- a/apps/statistics/test/store.cpp +++ b/apps/statistics/test/store.cpp @@ -3,41 +3,74 @@ #include #include #include "../store.h" +#include + +using namespace Poincare; namespace Statistics { -void assert_value_approximately_equal_to(double d1, double d2) { - assert((std::isnan(d1) && std::isnan(d2)) - || (std::isinf(d1) && std::isinf(d2) && d1*d2 > 0 /*same sign*/) - || fabs(d1-d2) < 0.001); +void assert_value_approximately_equal_to(double d1, double d2, double precision, double reference) { + quiz_assert((std::isnan(d1) && std::isnan(d2)) + || (std::isinf(d1) && std::isinf(d2) && d1 * d2 > 0.0 /*same sign*/) + || Helpers::IsApproximatelyEqual(d1, d2, precision, reference)); } -void assert_data_statictics_equal_to(double n[], double v[], int numberOfData, double sumOfOccurrences, double maxValue, double minValue, double range, double mean, double variance, double standardDeviation, double sampleStandardDeviation, double firstQuartile, double thirdQuartile, double quartileRange, double median, double sum, double squaredValueSum) { +void assert_data_statictics_equal_to(double v[], double n[], int numberOfData, double trueSumOfOccurrences, double trueMaxValue, double trueMinValue, double trueRange, double trueMean, double trueVariance, double trueStandardDeviation, double trueSampleStandardDeviation, double trueFirstQuartile, double trueThirdQuartile, double trueQuartileRange, double trueMedian, double trueSum, double trueSquaredValueSum) { Store store; int seriesIndex = 0; // Set the data in the store for (int i = 0; i < numberOfData; i++) { - store.set(n[i], seriesIndex, 0, i); - store.set(v[i], seriesIndex, 1, i); + store.set(v[i], seriesIndex, 0, i); + store.set(n[i], seriesIndex, 1, i); } + double precision = 1e-3; + // Compare the statistics - assert_value_approximately_equal_to(standardDeviation * standardDeviation, variance); - assert_value_approximately_equal_to(store.sumOfOccurrences(seriesIndex), sumOfOccurrences); - assert_value_approximately_equal_to(store.maxValue(seriesIndex), maxValue); - assert_value_approximately_equal_to(store.minValue(seriesIndex), minValue); - assert_value_approximately_equal_to(store.range(seriesIndex), range); - assert_value_approximately_equal_to(store.mean(seriesIndex), mean); - assert_value_approximately_equal_to(store.variance(seriesIndex), variance); - assert_value_approximately_equal_to(store.standardDeviation(seriesIndex), standardDeviation); - assert_value_approximately_equal_to(store.sampleStandardDeviation(seriesIndex), sampleStandardDeviation); - assert_value_approximately_equal_to(store.firstQuartile(seriesIndex), firstQuartile); - assert_value_approximately_equal_to(store.thirdQuartile(seriesIndex), thirdQuartile); - assert_value_approximately_equal_to(store.quartileRange(seriesIndex), quartileRange); - assert_value_approximately_equal_to(store.median(seriesIndex), median); - assert_value_approximately_equal_to(store.sum(seriesIndex), sum); - assert_value_approximately_equal_to(store.squaredValueSum(seriesIndex), squaredValueSum); + double sumOfOccurrences = store.sumOfOccurrences(seriesIndex); + double maxValue = store.maxValue(seriesIndex); + double minValue = store.minValue(seriesIndex); + double range = store.range(seriesIndex); + double mean = store.mean(seriesIndex); + double variance = store.variance(seriesIndex); + double standardDeviation = store.standardDeviation(seriesIndex); + double sampleStandardDeviation = store.sampleStandardDeviation(seriesIndex); + double firstQuartile = store.firstQuartile(seriesIndex); + double thirdQuartile = store.thirdQuartile(seriesIndex); + double quartileRange = store.quartileRange(seriesIndex); + double median = store.median(seriesIndex); + double sum = store.sum(seriesIndex); + double squaredValueSum = store.squaredValueSum(seriesIndex); + + // Check the positive statistics + quiz_assert(range >= 0.0); + quiz_assert(variance >= 0.0); + quiz_assert(standardDeviation >= 0.0); + quiz_assert(sampleStandardDeviation >= 0.0); + quiz_assert(quartileRange >= 0.0); + quiz_assert(squaredValueSum >= 0.0); + + double reference = trueSquaredValueSum; + assert_value_approximately_equal_to(variance, trueVariance, precision, reference); + assert_value_approximately_equal_to(squaredValueSum, trueSquaredValueSum, precision, reference); + + reference = std::sqrt(trueSquaredValueSum); + assert_value_approximately_equal_to(trueStandardDeviation * trueStandardDeviation, trueVariance, precision, reference); + assert_value_approximately_equal_to(sumOfOccurrences, trueSumOfOccurrences, precision, reference); + assert_value_approximately_equal_to(mean, trueMean, precision, reference); + assert_value_approximately_equal_to(standardDeviation, trueStandardDeviation, precision, reference); + assert_value_approximately_equal_to(sampleStandardDeviation, trueSampleStandardDeviation, precision, reference); + assert_value_approximately_equal_to(firstQuartile, trueFirstQuartile, precision, reference); + assert_value_approximately_equal_to(thirdQuartile, trueThirdQuartile, precision, reference); + assert_value_approximately_equal_to(median, trueMedian, precision, reference); + assert_value_approximately_equal_to(sum, trueSum, precision, reference); + + // Perfect match + assert_value_approximately_equal_to(maxValue, trueMaxValue, 0.0, 0.0); + assert_value_approximately_equal_to(minValue, trueMinValue, 0.0, 0.0); + assert_value_approximately_equal_to(range, trueRange, 0.0, 0.0); + assert_value_approximately_equal_to(quartileRange, trueQuartileRange, 0.0, 0.0); } QUIZ_CASE(data_statistics) { @@ -46,11 +79,11 @@ QUIZ_CASE(data_statistics) { * 1 1 1 1 */ constexpr int listLength1 = 4; - double n1[listLength1] = {1.0, 2.0, 3.0, 4.0}; - double v1[listLength1] = {1.0, 1.0, 1.0, 1.0}; + double v1[listLength1] = {1.0, 2.0, 3.0, 4.0}; + double n1[listLength1] = {1.0, 1.0, 1.0, 1.0}; assert_data_statictics_equal_to( - n1, v1, + n1, listLength1, /* sumOfOccurrences */ 4.0, /* maxValue */ 4.0, @@ -72,11 +105,11 @@ QUIZ_CASE(data_statistics) { * 1 1 1 1 1 1 1 1 1 1 1 */ constexpr int listLength2 = 11; - double n2[listLength2] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0}; - double v2[listLength2] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + double v2[listLength2] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0}; + double n2[listLength2] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; assert_data_statictics_equal_to( - n2, v2, + n2, listLength2, /* sumOfOccurrences */ 11.0, /* maxValue */ 11.0, @@ -96,12 +129,12 @@ QUIZ_CASE(data_statistics) { /* 1 2 3 4 5 6 7 8 9 10 11 12 * 1 1 1 1 1 1 1 1 1 1 1 1 */ - constexpr int listLength3 = 13; - double n3[listLength3] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; - double v3[listLength3] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + constexpr int listLength3 = 12; + double v3[listLength3] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + double n3[listLength3] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; assert_data_statictics_equal_to( - n3, v3, + n3, listLength3, /* sumOfOccurrences */ 12.0, /* maxValue */ 12.0, @@ -122,11 +155,11 @@ QUIZ_CASE(data_statistics) { * 0.2 0.05 0.3 0.0001 0.4499 */ constexpr int listLength4 = 5; - double n4[listLength4] = {1.0, 2.0, 3.0, 5.0, 10.0}; - double v4[listLength4] = {0.2, 0.05, 0.3, 0.0001, 0.4499}; + double v4[listLength4] = {1.0, 2.0, 3.0, 5.0, 10.0}; + double n4[listLength4] = {0.2, 0.05, 0.3, 0.0001, 0.4499}; assert_data_statictics_equal_to( - n4, v4, + n4, listLength4, /* sumOfOccurrences */ 1.0, /* maxValue */ 10.0, @@ -147,11 +180,11 @@ QUIZ_CASE(data_statistics) { * 0.4 0.00005 0.9 0.4 0.5 */ constexpr int listLength5 = 5; - double n5[listLength5] = {1.0, -2.0, 3.0, 5.0, 10.0}; - double v5[listLength5] = {0.4, 0.00005, 0.9, 0.4, 0.5}; + double v5[listLength5] = {1.0, -2.0, 3.0, 5.0, 10.0}; + double n5[listLength5] = {0.4, 0.00005, 0.9, 0.4, 0.5}; assert_data_statictics_equal_to( - n5, v5, + n5, listLength5, /* sumOfOccurrences */ 2.2, /* maxValue */ 10.0, @@ -172,11 +205,11 @@ QUIZ_CASE(data_statistics) { * 4 5 3 1 9 */ constexpr int listLength6 = 6; - double n6[listLength6] = {-7.0, -10.0, 1.0, 2.0, 5.0, -2.0}; - double v6[listLength6] = {4.0, 5.0, 3.0, 0.5, 1.0, 9.0}; + double v6[listLength6] = {-7.0, -10.0, 1.0, 2.0, 5.0, -2.0}; + double n6[listLength6] = {4.0, 5.0, 3.0, 0.5, 1.0, 9.0}; assert_data_statictics_equal_to( - n6, v6, + n6, listLength6, /* sumOfOccurrences */ 22.5, /* maxValue */ 5.0, @@ -197,11 +230,11 @@ QUIZ_CASE(data_statistics) { * 1 1 1 0 0 0 1 */ constexpr int listLength7 = 7; - double n7[listLength7] = {1.0, 1.0, 1.0, 10.0, 3.0, -1.0, 3.0}; - double v7[listLength7] = {1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0}; + double v7[listLength7] = {1.0, 1.0, 1.0, 10.0, 3.0, -1.0, 3.0}; + double n7[listLength7] = {1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0}; assert_data_statictics_equal_to( - n7, v7, + n7, listLength7, /* sumOfOccurrences */ 4.0, /* maxValue */ 3.0, @@ -222,11 +255,11 @@ QUIZ_CASE(data_statistics) { * 0 1 0 1 */ constexpr int listLength8 = 4; - double n8[listLength8] = {1.0, 2.0, 3.0, 4.0}; - double v8[listLength8] = {0.0, 1.0, 0.0, 1.0}; + double v8[listLength8] = {1.0, 2.0, 3.0, 4.0}; + double n8[listLength8] = {0.0, 1.0, 0.0, 1.0}; assert_data_statictics_equal_to( - n8, v8, + n8, listLength8, /* sumOfOccurrences */ 2.0, /* maxValue */ 4.0, @@ -242,6 +275,31 @@ QUIZ_CASE(data_statistics) { /* median */ 3.0, /* sum */ 6.0, /* squaredValueSum */ 20.0); + + /* -996.85840734641 + * 9 */ + + constexpr int listLength9 = 1; + double v9[listLength9] = {-996.85840734641}; + double n9[listLength9] = {9}; + assert_data_statictics_equal_to( + v9, + n9, + listLength9, + /* sumOfOccurrences */ 9.0, + /* maxValue */ -996.85840734641, + /* minValue */ -996.85840734641, + /* range */ 0.0, + /* mean */ -996.85840734641, + /* variance */ 0.0, + /* standardDeviation */ 0.0, + /* sampleStandardDeviation */ 0.0, + /* firstQuartile */ -996.85840734641, + /* thirdQuartile */ -996.85840734641, + /* quartileRange */ 0.0, + /* median */ -996.85840734641, + /* sum */ -8971.72566611769, + /* squaredValueSum */ 8943540.158675); } } diff --git a/poincare/include/poincare/helpers.h b/poincare/include/poincare/helpers.h index 517a64d2a..dab8dd8c7 100644 --- a/poincare/include/poincare/helpers.h +++ b/poincare/include/poincare/helpers.h @@ -11,6 +11,7 @@ namespace Helpers { size_t AlignedSize(size_t realSize, size_t alignment); size_t Gcd(size_t a, size_t b); bool Rotate(uint32_t * dst, uint32_t * src, size_t len); +bool IsApproximatelyEqual(double observedValue, double expectedValue, double precision, double reference); } diff --git a/poincare/src/helpers.cpp b/poincare/src/helpers.cpp index af8275f08..a412693f8 100644 --- a/poincare/src/helpers.cpp +++ b/poincare/src/helpers.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace Poincare { @@ -97,5 +98,21 @@ bool Rotate(uint32_t * dst, uint32_t * src, size_t len) { return true; } +bool IsApproximatelyEqual(double observedValue, double expectedValue, double precision, double reference) { + /* Return true if observedValue and expectedValue are approximately equal, according to precision and reference parameters */ + if (expectedValue != 0.0) { + double relativeError = std::fabs((observedValue - expectedValue) / expectedValue); + // The relative error must be smaller than the precision + return relativeError <= precision; + } + if (reference != 0.0) { + double referenceRatio = std::fabs(observedValue / reference); + // The observedValue must be negligible against the reference + return referenceRatio <= precision; + } + // The observedValue must exactly match the expectedValue + return observedValue == expectedValue; +} + } }