From ef8e05b6af214262f04e97dead2a5e395facea8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20MARQUET?= <72651575+BreizhHardware@users.noreply.github.com> Date: Mon, 29 Sep 2025 16:38:58 +0200 Subject: [PATCH] Obisidian vault auto-backup: 29-09-2025 16:38:58 on . 1 files edited --- ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb | 208 +++++++++++++++++++++++++++--- 1 file changed, 188 insertions(+), 20 deletions(-) diff --git a/ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb b/ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb index 6b4037b..c4d7f5f 100644 --- a/ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb +++ b/ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "5a9a29a0", "metadata": {}, "outputs": [ @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "5be8782b", "metadata": {}, "outputs": [], @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "70575266", "metadata": {}, "outputs": [ @@ -102,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "eaffc443", "metadata": {}, "outputs": [ @@ -171,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "d333dc4d", "metadata": {}, "outputs": [ @@ -256,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "c0966e7f", "metadata": {}, "outputs": [ @@ -318,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "b0560d20", "metadata": {}, "outputs": [ @@ -399,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "bff45d57", "metadata": {}, "outputs": [ @@ -449,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "e02058de", "metadata": {}, "outputs": [ @@ -1218,7 +1218,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "e73e18a3", "metadata": {}, "outputs": [ @@ -1296,7 +1296,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "acaeacd9", "metadata": {}, "outputs": [ @@ -1322,7 +1322,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "e5949779", "metadata": {}, "outputs": [ @@ -1370,7 +1370,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "5733c7b1", "metadata": {}, "outputs": [ @@ -1447,7 +1447,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "970fce9c", "metadata": {}, "outputs": [ @@ -1583,7 +1583,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "b4a6c352", "metadata": {}, "outputs": [ @@ -1677,7 +1677,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "1ad09da3", "metadata": {}, "outputs": [ @@ -1777,7 +1777,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "0a4ec288", "metadata": {}, "outputs": [ @@ -1941,7 +1941,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "7dc960b0", "metadata": {}, "outputs": [ @@ -2037,7 +2037,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "6f487ebf", "metadata": {}, "outputs": [ @@ -2093,9 +2093,29 @@ "name": "stderr", "output_type": "stream", "text": [ + "/Volumes/SSD/Nextcloud/Documents/ISEN/Cours/Obsidian Vault/ISEN/IA/CIPA4/TP/TP2/.venv/lib/python3.13/site-packages/sklearn/linear_model/_stochastic_gradient.py:726: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n", + " warnings.warn(\n", "/Volumes/SSD/Nextcloud/Documents/ISEN/Cours/Obsidian Vault/ISEN/IA/CIPA4/TP/TP2/.venv/lib/python3.13/site-packages/sklearn/linear_model/_stochastic_gradient.py:726: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n", " warnings.warn(\n" ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "c. Évaluation avec données standardisées:\n", + "Fold 1: 0.9013 (90.14%)\n", + "Fold 2: 0.8939 (89.39%)\n", + "Fold 3: 0.9055 (90.55%)\n", + "Moyenne: 0.9002 (90.02%)\n", + "\n", + "=== Comparaison des résultats ===\n", + "Sans standardisation: 0.8721 (87.21%)\n", + "Avec standardisation: 0.9002 (90.02%)\n", + "Amélioration: +0.0281 (+2.81 points de %)\n", + "✅ Les résultats sont MEILLEURS avec la standardisation\n" + ] } ], "source": [ @@ -2114,7 +2134,7 @@ "# c. Évaluer le classifieur SGD avec les données standardisées\n", "scores_scaled = cross_val_score(sgd_multiclass, X_train_scaled, Y_train, cv=3, scoring='accuracy')\n", "\n", - "print(\"\\nc. Évaluation avec données standardisées:\")\n", + "print(\"\\nÉvaluation avec données standardisées:\")\n", "for i, score in enumerate(scores_scaled, 1):\n", " print(f\"Fold {i}: {score:.4f} ({score*100:.2f}%)\")\n", "\n", @@ -2135,11 +2155,159 @@ " print(\"❌ Les résultats ne sont PAS meilleurs avec la standardisation\")" ] }, + { + "cell_type": "markdown", + "id": "ab0c9c23", + "metadata": {}, + "source": [ + "### 2-2- Matrice de confusion" + ] + }, { "cell_type": "code", "execution_count": null, "id": "55bf66d7", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Matrice de confusion du classifieur SGD multi-classes ===\n", + "\n", + "Prédiction des classes avec cross_val_predict...\n" + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mLe noyau s’est bloqué lors de l’exécution du code dans une cellule active ou une cellule précédente. \n", + "\u001b[1;31mVeuillez vérifier le code dans la ou les cellules pour identifier une cause possible de l’échec. \n", + "\u001b[1;31mCliquez ici pour plus d’informations. \n", + "\u001b[1;31mPour plus d’informations, consultez Jupyter log." + ] + } + ], + "source": [ + "from sklearn.model_selection import cross_val_predict\n", + "from sklearn.metrics import confusion_matrix\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "print(\"=== Matrice de confusion du classifieur SGD multi-classes ===\\n\")\n", + "\n", + "# Prédire les classes avec cross_val_predict pour le modèle multi-classes\n", + "print(\"Prédiction des classes avec cross_val_predict...\")\n", + "y_train_pred_multiclass = cross_val_predict(sgd_multiclass, X_train, Y_train, cv=3)\n", + "\n", + "print(f\"Forme des prédictions: {y_train_pred_multiclass.shape}\")\n", + "print(f\"Classes prédites uniques: {np.unique(y_train_pred_multiclass)}\")\n", + "\n", + "# a. Calculer la matrice de confusion normale et normalisée\n", + "cm_multiclass = confusion_matrix(Y_train, y_train_pred_multiclass)\n", + "print(\"\\na. Matrice de confusion (valeurs absolues) 10x10:\")\n", + "print(cm_multiclass)\n", + "\n", + "cm_normalized_multiclass = confusion_matrix(Y_train, y_train_pred_multiclass, normalize='true')\n", + "print(\"\\nMatrice de confusion normalisée:\")\n", + "print(cm_normalized_multiclass)\n", + "\n", + "# Visualisation des matrices de confusion\n", + "fig, axes = plt.subplots(1, 2, figsize=(15, 6))\n", + "\n", + "# Matrice de confusion absolue\n", + "im1 = axes[0].imshow(cm_multiclass, interpolation='nearest', cmap=plt.cm.Blues)\n", + "axes[0].set_title('Matrice de confusion (valeurs absolues)')\n", + "axes[0].set_xlabel('Classe prédite')\n", + "axes[0].set_ylabel('Classe réelle')\n", + "\n", + "# Ajouter les annotations pour les valeurs\n", + "for i in range(10):\n", + " for j in range(10):\n", + " axes[0].text(j, i, format(cm_multiclass[i, j], 'd'), \n", + " ha=\"center\", va=\"center\", \n", + " color=\"white\" if cm_multiclass[i, j] > cm_multiclass.max()/2 else \"black\",\n", + " fontsize=8)\n", + "\n", + "axes[0].set_xticks(range(10))\n", + "axes[0].set_yticks(range(10))\n", + "axes[0].set_xticklabels(range(10))\n", + "axes[0].set_yticklabels(range(10))\n", + "\n", + "# Matrice de confusion normalisée\n", + "im2 = axes[1].imshow(cm_normalized_multiclass, interpolation='nearest', cmap=plt.cm.Blues)\n", + "axes[1].set_title('Matrice de confusion normalisée')\n", + "axes[1].set_xlabel('Classe prédite')\n", + "axes[1].set_ylabel('Classe réelle')\n", + "\n", + "# Ajouter les annotations pour les pourcentages\n", + "for i in range(10):\n", + " for j in range(10):\n", + " axes[1].text(j, i, format(cm_normalized_multiclass[i, j], '.2f'), \n", + " ha=\"center\", va=\"center\", \n", + " color=\"white\" if cm_normalized_multiclass[i, j] > 0.5 else \"black\",\n", + " fontsize=8)\n", + "\n", + "axes[1].set_xticks(range(10))\n", + "axes[1].set_yticks(range(10))\n", + "axes[1].set_xticklabels(range(10))\n", + "axes[1].set_yticklabels(range(10))\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# b. Interprétation des résultats\n", + "print(\"\\n=== Interprétation des résultats ===\")\n", + "\n", + "# Calcul des métriques globales\n", + "accuracy_total = np.trace(cm_multiclass) / np.sum(cm_multiclass)\n", + "print(f\"\\nAccuracy globale: {accuracy_total:.4f} ({accuracy_total*100:.2f}%)\")\n", + "\n", + "# Analyse par classe (diagonale = bonnes prédictions)\n", + "print(f\"\\nPerformances par classe (rappel):\")\n", + "for i in range(10):\n", + " rappel_classe = cm_normalized_multiclass[i, i]\n", + " nb_instances = cm_multiclass[i, :].sum()\n", + " print(f\" Classe {i}: {rappel_classe:.3f} ({rappel_classe*100:.1f}%) - {nb_instances} instances\")\n", + "\n", + "# Classes les mieux classées\n", + "diagonale = np.diag(cm_normalized_multiclass)\n", + "meilleures_classes = np.argsort(diagonale)[::-1][:3]\n", + "pires_classes = np.argsort(diagonale)[:3]\n", + "\n", + "print(f\"\\nMeilleures classes (rappel élevé):\")\n", + "for classe in meilleures_classes:\n", + " print(f\" Classe {classe}: {diagonale[classe]:.3f} ({diagonale[classe]*100:.1f}%)\")\n", + "\n", + "print(f\"\\nPires classes (rappel faible):\")\n", + "for classe in pires_classes:\n", + " print(f\" Classe {classe}: {diagonale[classe]:.3f} ({diagonale[classe]*100:.1f}%)\")\n", + "\n", + "# Erreurs les plus fréquentes (hors diagonale)\n", + "cm_erreurs = cm_multiclass.copy()\n", + "np.fill_diagonal(cm_erreurs, 0) # Enlever la diagonale\n", + "erreurs_max = np.unravel_index(np.argmax(cm_erreurs), cm_erreurs.shape)\n", + "\n", + "print(f\"\\nErreurs les plus fréquentes:\")\n", + "indices_erreurs = np.argsort(cm_erreurs.ravel())[::-1][:5]\n", + "for idx in indices_erreurs:\n", + " i, j = np.unravel_index(idx, cm_erreurs.shape)\n", + " if cm_erreurs[i, j] > 0:\n", + " print(f\" Classe {i} prédite comme {j}: {cm_erreurs[i, j]} fois\")\n", + "\n", + "print(f\"\\n=== Conclusion ===\")\n", + "print(\"La matrice 10x10 montre les performances du classifieur sur les 10 chiffres.\")\n", + "print(\"La diagonale représente les bonnes classifications.\")\n", + "print(\"Les valeurs hors diagonale montrent les confusions entre classes.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16a1368f", + "metadata": {}, "outputs": [], "source": [] }