diff --git a/apps/regression/calculation_controller.cpp b/apps/regression/calculation_controller.cpp index 1f3d97efa..eb262a2ef 100644 --- a/apps/regression/calculation_controller.cpp +++ b/apps/regression/calculation_controller.cpp @@ -237,7 +237,7 @@ void CalculationController::willDisplayCellAtLocation(HighlightCell * cell, int if ((modelType == Model::Type::Linear && j == numberRows - 2) || j == numberRows - 1) { double calculation; if (j == numberRows - 1) { - calculation = m_store->squaredCorrelationCoefficient(seriesNumber); + calculation = m_store->determinationCoefficientForSeries(seriesNumber, globContext); } else { calculation = m_store->correlationCoefficient(seriesNumber); } diff --git a/apps/regression/graph_controller.cpp b/apps/regression/graph_controller.cpp index b207c3ed9..1bb6d9625 100644 --- a/apps/regression/graph_controller.cpp +++ b/apps/regression/graph_controller.cpp @@ -200,7 +200,7 @@ void GraphController::reloadBannerView() { // Set "r2=..." numberOfChar = 0; legend = " r2="; - double r2 = m_store->squaredCorrelationCoefficient(*m_selectedSeriesIndex); + double r2 = m_store->determinationCoefficientForSeries(*m_selectedSeriesIndex, globalContext()); numberOfChar += strlcpy(buffer, legend, bufferSize); numberOfChar += PoincareHelpers::ConvertFloatToText(r2, buffer + numberOfChar, bufferSize - numberOfChar, Preferences::LargeNumberOfSignificantDigits); m_bannerView.subTextAtIndex(1+index)->setText(buffer); diff --git a/apps/regression/store.cpp b/apps/regression/store.cpp index cfba25903..8b9e438cb 100644 --- a/apps/regression/store.cpp +++ b/apps/regression/store.cpp @@ -149,8 +149,8 @@ void Store::setDefault() { } } float range = maxX - minX; - setXMin(minX - k_displayHorizontalMarginRatio*range); - setXMax(maxX + k_displayHorizontalMarginRatio*range); + setXMin(minX - k_displayHorizontalMarginRatio * range); + setXMax(maxX + k_displayHorizontalMarginRatio * range); } /* Series */ @@ -161,7 +161,7 @@ bool Store::seriesIsEmpty(int series) const { /* Calculations */ -double * Store::coefficientsForSeries(int series, Poincare::Context * globalContext) { +void Store::updateCoefficients(int series, Poincare::Context * globalContext) { assert(series >= 0 && series <= k_numberOfSeries); assert(!seriesIsEmpty(series)); uint32_t storeChecksumSeries = storeChecksumForSeries(series); @@ -179,10 +179,25 @@ double * Store::coefficientsForSeries(int series, Poincare::Context * globalCont seriesModel->fit(this, series, m_regressionCoefficients[series], globalContext); m_regressionChanged[series] = false; m_seriesChecksum[series] = storeChecksumSeries; + /* m_determinationCoefficient must be updated after m_seriesChecksum and m_regressionChanged + * updates to avoid infinite recursive calls as computeDeterminationCoefficient calls + * yValueForXValue which calls coefficientsForSeries which calls updateCoefficients */ + m_determinationCoefficient[series] = computeDeterminationCoefficient(series, globalContext); } +} + +double * Store::coefficientsForSeries(int series, Poincare::Context * globalContext) { + updateCoefficients(series, globalContext); return m_regressionCoefficients[series]; } +double Store::determinationCoefficientForSeries(int series, Poincare::Context * globalContext) { + /* Returns the Determination coefficient (R2). + * It will be updated if the regression has been updated */ + updateCoefficients(series, globalContext); + return m_determinationCoefficient[series]; +} + double Store::doubleCastedNumberOfPairsOfSeries(int series) const { return DoublePairStore::numberOfPairsOfSeries(series); } @@ -212,12 +227,13 @@ float Store::minValueOfColumn(int series, int i) const { double Store::squaredValueSumOfColumn(int series, int i, bool lnOfSeries) const { double result = 0; - for (int k = 0; k < numberOfPairsOfSeries(series); k++) { + const int numberOfPairs = numberOfPairsOfSeries(series); + for (int k = 0; k < numberOfPairs; k++) { + double value = m_data[series][i][k]; if (lnOfSeries) { - result += log(m_data[series][i][k]) * log(m_data[series][i][k]); - } else { - result += m_data[series][i][k]*m_data[series][i][k]; + value = log(value); } + result += value * value; } return result; } @@ -225,22 +241,24 @@ double Store::squaredValueSumOfColumn(int series, int i, bool lnOfSeries) const double Store::columnProductSum(int series, bool lnOfSeries) const { double result = 0; for (int k = 0; k < numberOfPairsOfSeries(series); k++) { + double value0 = m_data[series][0][k]; + double value1 = m_data[series][1][k]; if (lnOfSeries) { - result += log(m_data[series][0][k]) * log(m_data[series][1][k]); - } else { - result += m_data[series][0][k] * m_data[series][1][k]; + value0 = log(value0); + value1 = log(value1); } + result += value0 * value1; } return result; } double Store::meanOfColumn(int series, int i, bool lnOfSeries) const { - return numberOfPairsOfSeries(series) == 0 ? 0 : sumOfColumn(series, i, lnOfSeries)/numberOfPairsOfSeries(series); + return numberOfPairsOfSeries(series) == 0 ? 0 : sumOfColumn(series, i, lnOfSeries) / numberOfPairsOfSeries(series); } double Store::varianceOfColumn(int series, int i, bool lnOfSeries) const { double mean = meanOfColumn(series, i, lnOfSeries); - return squaredValueSumOfColumn(series, i, lnOfSeries)/numberOfPairsOfSeries(series) - mean*mean; + return squaredValueSumOfColumn(series, i, lnOfSeries)/numberOfPairsOfSeries(series) - mean * mean; } double Store::standardDeviationOfColumn(int series, int i, bool lnOfSeries) const { @@ -248,7 +266,9 @@ double Store::standardDeviationOfColumn(int series, int i, bool lnOfSeries) cons } double Store::covariance(int series, bool lnOfSeries) const { - return columnProductSum(series, lnOfSeries)/numberOfPairsOfSeries(series) - meanOfColumn(series, 0, lnOfSeries)*meanOfColumn(series, 1, lnOfSeries); + double mean0 = meanOfColumn(series, 0, lnOfSeries); + double mean1 = meanOfColumn(series, 1, lnOfSeries); + return columnProductSum(series, lnOfSeries)/numberOfPairsOfSeries(series) - mean0 * mean1; } double Store::slope(int series, bool lnOfSeries) const { @@ -268,20 +288,39 @@ double Store::yValueForXValue(int series, double x, Poincare::Context * globalCo double Store::xValueForYValue(int series, double y, Poincare::Context * globalContext) { Model * model = regressionModel((int)m_regressionTypes[series]); double * coefficients = coefficientsForSeries(series, globalContext); - return model->levelSet(coefficients, xMin(), xGridUnit()/10.0, xMax(), y, globalContext); + return model->levelSet(coefficients, xMin(), xGridUnit() / 10.0, xMax(), y, globalContext); } double Store::correlationCoefficient(int series) const { - double sd0 = standardDeviationOfColumn(series, 0); - double sd1 = standardDeviationOfColumn(series, 1); - return (sd0 == 0.0 || sd1 == 0.0) ? 1.0 : covariance(series)/(sd0*sd1); -} - -double Store::squaredCorrelationCoefficient(int series) const { - double cov = covariance(series); + /* Returns the correlation coefficient (R) between the series X and Y. + * In non-linear regressions, its square is different from the determinationCoefficient + * It is usually displayed in linear regressions only to avoid confusion */ double v0 = varianceOfColumn(series, 0); double v1 = varianceOfColumn(series, 1); - return (v0 == 0.0 || v1 == 0.0) ? 1.0 : cov*cov/(v0*v1); + return (v0 == 0.0 || v1 == 0.0) ? 1.0 : covariance(series) / std::sqrt(v0 * v1); +} + +double Store::computeDeterminationCoefficient(int series, Poincare::Context * globalContext) { + /* Computes and returns the determination coefficient (R2) of the regression. + * For regressions, it is equal to the square of the correlation coefficient between + * the series Y and the evaluated values from the series X and the selected model + * Computing the coefficient using the latter equality would require more calls to the evaluated + * values and would be less precise. */ + // Residual sum of squares + double ssr = 0; + // Total sum of squares + double sst = 0; + double mean = meanOfColumn(series, 1); + const int numberOfPairs = numberOfPairsOfSeries(series); + for (int k = 0; k < numberOfPairs; k++) { + // Difference between the observation and the estimated value of the model + double residual = m_data[series][1][k] - yValueForXValue(series, m_data[series][0][k], globalContext); + ssr += residual * residual; + // Difference between the observation and the overall observations mean + double difference = m_data[series][1][k] - mean; + sst += difference * difference; + } + return sst == 0.0 ? 1.0 : 1.0 - ssr / sst; } Model * Store::regressionModel(int index) { diff --git a/apps/regression/store.h b/apps/regression/store.h index 92acd0f8f..9a305f341 100644 --- a/apps/regression/store.h +++ b/apps/regression/store.h @@ -57,7 +57,9 @@ public: bool seriesIsEmpty(int series) const override; // Calculation + void updateCoefficients(int series, Poincare::Context * globalContext); double * coefficientsForSeries(int series, Poincare::Context * globalContext); + double determinationCoefficientForSeries(int series, Poincare::Context * globalContext); // R2 double doubleCastedNumberOfPairsOfSeries(int series) const; double squaredValueSumOfColumn(int series, int i, bool lnOfSeries = false) const; double columnProductSum(int series, bool lnOfSeries = false) const; @@ -69,9 +71,9 @@ public: double yIntercept(int series, bool lnOfSeries = false) const; double yValueForXValue(int series, double x, Poincare::Context * globalContext); double xValueForYValue(int series, double y, Poincare::Context * globalContext); - double correlationCoefficient(int series) const; - double squaredCorrelationCoefficient(int series) const; + double correlationCoefficient(int series) const; // R private: + double computeDeterminationCoefficient(int series, Poincare::Context * globalContext); constexpr static float k_displayHorizontalMarginRatio = 0.05f; void resetMemoization(); float maxValueOfColumn(int series, int i) const; //TODO LEA why float ? @@ -90,6 +92,7 @@ private: TrigonometricModel m_trigonometricModel; LogisticModel m_logisticModel; double m_regressionCoefficients[k_numberOfSeries][Model::k_maxNumberOfCoefficients]; + double m_determinationCoefficient[k_numberOfSeries]; bool m_regressionChanged[k_numberOfSeries]; Poincare::Preferences::AngleUnit m_angleUnit; }; diff --git a/apps/regression/test/model.cpp b/apps/regression/test/model.cpp index 369af466b..b92e1cce5 100644 --- a/apps/regression/test/model.cpp +++ b/apps/regression/test/model.cpp @@ -47,7 +47,7 @@ void assert_regression_is(double * xi, double * yi, int numberOfPoints, Model::T } // Compute and compare r2 - double r2 = store.squaredCorrelationCoefficient(series); + double r2 = store.determinationCoefficientForSeries(series, &globalContext); assert_value_is(r2, trueR2); }