From ef8e05b6af214262f04e97dead2a5e395facea8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20MARQUET?=
<72651575+BreizhHardware@users.noreply.github.com>
Date: Mon, 29 Sep 2025 16:38:58 +0200
Subject: [PATCH] Obisidian vault auto-backup: 29-09-2025 16:38:58 on . 1 files
edited
---
ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb | 208 +++++++++++++++++++++++++++---
1 file changed, 188 insertions(+), 20 deletions(-)
diff --git a/ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb b/ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb
index 6b4037b..c4d7f5f 100644
--- a/ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb
+++ b/ISEN/IA/CIPA4/TP/TP2/tp2_IA.ipynb
@@ -18,7 +18,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 1,
"id": "5a9a29a0",
"metadata": {},
"outputs": [
@@ -62,7 +62,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "5be8782b",
"metadata": {},
"outputs": [],
@@ -81,7 +81,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "70575266",
"metadata": {},
"outputs": [
@@ -102,7 +102,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "eaffc443",
"metadata": {},
"outputs": [
@@ -171,7 +171,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "d333dc4d",
"metadata": {},
"outputs": [
@@ -256,7 +256,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "c0966e7f",
"metadata": {},
"outputs": [
@@ -318,7 +318,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "b0560d20",
"metadata": {},
"outputs": [
@@ -399,7 +399,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "bff45d57",
"metadata": {},
"outputs": [
@@ -449,7 +449,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "e02058de",
"metadata": {},
"outputs": [
@@ -1218,7 +1218,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "e73e18a3",
"metadata": {},
"outputs": [
@@ -1296,7 +1296,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"id": "acaeacd9",
"metadata": {},
"outputs": [
@@ -1322,7 +1322,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"id": "e5949779",
"metadata": {},
"outputs": [
@@ -1370,7 +1370,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"id": "5733c7b1",
"metadata": {},
"outputs": [
@@ -1447,7 +1447,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "970fce9c",
"metadata": {},
"outputs": [
@@ -1583,7 +1583,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "b4a6c352",
"metadata": {},
"outputs": [
@@ -1677,7 +1677,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "1ad09da3",
"metadata": {},
"outputs": [
@@ -1777,7 +1777,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"id": "0a4ec288",
"metadata": {},
"outputs": [
@@ -1941,7 +1941,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "7dc960b0",
"metadata": {},
"outputs": [
@@ -2037,7 +2037,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "6f487ebf",
"metadata": {},
"outputs": [
@@ -2093,9 +2093,29 @@
"name": "stderr",
"output_type": "stream",
"text": [
+ "/Volumes/SSD/Nextcloud/Documents/ISEN/Cours/Obsidian Vault/ISEN/IA/CIPA4/TP/TP2/.venv/lib/python3.13/site-packages/sklearn/linear_model/_stochastic_gradient.py:726: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n",
+ " warnings.warn(\n",
"/Volumes/SSD/Nextcloud/Documents/ISEN/Cours/Obsidian Vault/ISEN/IA/CIPA4/TP/TP2/.venv/lib/python3.13/site-packages/sklearn/linear_model/_stochastic_gradient.py:726: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n",
" warnings.warn(\n"
]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "c. Évaluation avec données standardisées:\n",
+ "Fold 1: 0.9013 (90.14%)\n",
+ "Fold 2: 0.8939 (89.39%)\n",
+ "Fold 3: 0.9055 (90.55%)\n",
+ "Moyenne: 0.9002 (90.02%)\n",
+ "\n",
+ "=== Comparaison des résultats ===\n",
+ "Sans standardisation: 0.8721 (87.21%)\n",
+ "Avec standardisation: 0.9002 (90.02%)\n",
+ "Amélioration: +0.0281 (+2.81 points de %)\n",
+ "✅ Les résultats sont MEILLEURS avec la standardisation\n"
+ ]
}
],
"source": [
@@ -2114,7 +2134,7 @@
"# c. Évaluer le classifieur SGD avec les données standardisées\n",
"scores_scaled = cross_val_score(sgd_multiclass, X_train_scaled, Y_train, cv=3, scoring='accuracy')\n",
"\n",
- "print(\"\\nc. Évaluation avec données standardisées:\")\n",
+ "print(\"\\nÉvaluation avec données standardisées:\")\n",
"for i, score in enumerate(scores_scaled, 1):\n",
" print(f\"Fold {i}: {score:.4f} ({score*100:.2f}%)\")\n",
"\n",
@@ -2135,11 +2155,159 @@
" print(\"❌ Les résultats ne sont PAS meilleurs avec la standardisation\")"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "ab0c9c23",
+ "metadata": {},
+ "source": [
+ "### 2-2- Matrice de confusion"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"id": "55bf66d7",
"metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "=== Matrice de confusion du classifieur SGD multi-classes ===\n",
+ "\n",
+ "Prédiction des classes avec cross_val_predict...\n"
+ ]
+ },
+ {
+ "ename": "",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31mLe noyau s’est bloqué lors de l’exécution du code dans une cellule active ou une cellule précédente. \n",
+ "\u001b[1;31mVeuillez vérifier le code dans la ou les cellules pour identifier une cause possible de l’échec. \n",
+ "\u001b[1;31mCliquez ici pour plus d’informations. \n",
+ "\u001b[1;31mPour plus d’informations, consultez Jupyter log."
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import cross_val_predict\n",
+ "from sklearn.metrics import confusion_matrix\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "\n",
+ "print(\"=== Matrice de confusion du classifieur SGD multi-classes ===\\n\")\n",
+ "\n",
+ "# Prédire les classes avec cross_val_predict pour le modèle multi-classes\n",
+ "print(\"Prédiction des classes avec cross_val_predict...\")\n",
+ "y_train_pred_multiclass = cross_val_predict(sgd_multiclass, X_train, Y_train, cv=3)\n",
+ "\n",
+ "print(f\"Forme des prédictions: {y_train_pred_multiclass.shape}\")\n",
+ "print(f\"Classes prédites uniques: {np.unique(y_train_pred_multiclass)}\")\n",
+ "\n",
+ "# a. Calculer la matrice de confusion normale et normalisée\n",
+ "cm_multiclass = confusion_matrix(Y_train, y_train_pred_multiclass)\n",
+ "print(\"\\na. Matrice de confusion (valeurs absolues) 10x10:\")\n",
+ "print(cm_multiclass)\n",
+ "\n",
+ "cm_normalized_multiclass = confusion_matrix(Y_train, y_train_pred_multiclass, normalize='true')\n",
+ "print(\"\\nMatrice de confusion normalisée:\")\n",
+ "print(cm_normalized_multiclass)\n",
+ "\n",
+ "# Visualisation des matrices de confusion\n",
+ "fig, axes = plt.subplots(1, 2, figsize=(15, 6))\n",
+ "\n",
+ "# Matrice de confusion absolue\n",
+ "im1 = axes[0].imshow(cm_multiclass, interpolation='nearest', cmap=plt.cm.Blues)\n",
+ "axes[0].set_title('Matrice de confusion (valeurs absolues)')\n",
+ "axes[0].set_xlabel('Classe prédite')\n",
+ "axes[0].set_ylabel('Classe réelle')\n",
+ "\n",
+ "# Ajouter les annotations pour les valeurs\n",
+ "for i in range(10):\n",
+ " for j in range(10):\n",
+ " axes[0].text(j, i, format(cm_multiclass[i, j], 'd'), \n",
+ " ha=\"center\", va=\"center\", \n",
+ " color=\"white\" if cm_multiclass[i, j] > cm_multiclass.max()/2 else \"black\",\n",
+ " fontsize=8)\n",
+ "\n",
+ "axes[0].set_xticks(range(10))\n",
+ "axes[0].set_yticks(range(10))\n",
+ "axes[0].set_xticklabels(range(10))\n",
+ "axes[0].set_yticklabels(range(10))\n",
+ "\n",
+ "# Matrice de confusion normalisée\n",
+ "im2 = axes[1].imshow(cm_normalized_multiclass, interpolation='nearest', cmap=plt.cm.Blues)\n",
+ "axes[1].set_title('Matrice de confusion normalisée')\n",
+ "axes[1].set_xlabel('Classe prédite')\n",
+ "axes[1].set_ylabel('Classe réelle')\n",
+ "\n",
+ "# Ajouter les annotations pour les pourcentages\n",
+ "for i in range(10):\n",
+ " for j in range(10):\n",
+ " axes[1].text(j, i, format(cm_normalized_multiclass[i, j], '.2f'), \n",
+ " ha=\"center\", va=\"center\", \n",
+ " color=\"white\" if cm_normalized_multiclass[i, j] > 0.5 else \"black\",\n",
+ " fontsize=8)\n",
+ "\n",
+ "axes[1].set_xticks(range(10))\n",
+ "axes[1].set_yticks(range(10))\n",
+ "axes[1].set_xticklabels(range(10))\n",
+ "axes[1].set_yticklabels(range(10))\n",
+ "\n",
+ "plt.tight_layout()\n",
+ "plt.show()\n",
+ "\n",
+ "# b. Interprétation des résultats\n",
+ "print(\"\\n=== Interprétation des résultats ===\")\n",
+ "\n",
+ "# Calcul des métriques globales\n",
+ "accuracy_total = np.trace(cm_multiclass) / np.sum(cm_multiclass)\n",
+ "print(f\"\\nAccuracy globale: {accuracy_total:.4f} ({accuracy_total*100:.2f}%)\")\n",
+ "\n",
+ "# Analyse par classe (diagonale = bonnes prédictions)\n",
+ "print(f\"\\nPerformances par classe (rappel):\")\n",
+ "for i in range(10):\n",
+ " rappel_classe = cm_normalized_multiclass[i, i]\n",
+ " nb_instances = cm_multiclass[i, :].sum()\n",
+ " print(f\" Classe {i}: {rappel_classe:.3f} ({rappel_classe*100:.1f}%) - {nb_instances} instances\")\n",
+ "\n",
+ "# Classes les mieux classées\n",
+ "diagonale = np.diag(cm_normalized_multiclass)\n",
+ "meilleures_classes = np.argsort(diagonale)[::-1][:3]\n",
+ "pires_classes = np.argsort(diagonale)[:3]\n",
+ "\n",
+ "print(f\"\\nMeilleures classes (rappel élevé):\")\n",
+ "for classe in meilleures_classes:\n",
+ " print(f\" Classe {classe}: {diagonale[classe]:.3f} ({diagonale[classe]*100:.1f}%)\")\n",
+ "\n",
+ "print(f\"\\nPires classes (rappel faible):\")\n",
+ "for classe in pires_classes:\n",
+ " print(f\" Classe {classe}: {diagonale[classe]:.3f} ({diagonale[classe]*100:.1f}%)\")\n",
+ "\n",
+ "# Erreurs les plus fréquentes (hors diagonale)\n",
+ "cm_erreurs = cm_multiclass.copy()\n",
+ "np.fill_diagonal(cm_erreurs, 0) # Enlever la diagonale\n",
+ "erreurs_max = np.unravel_index(np.argmax(cm_erreurs), cm_erreurs.shape)\n",
+ "\n",
+ "print(f\"\\nErreurs les plus fréquentes:\")\n",
+ "indices_erreurs = np.argsort(cm_erreurs.ravel())[::-1][:5]\n",
+ "for idx in indices_erreurs:\n",
+ " i, j = np.unravel_index(idx, cm_erreurs.shape)\n",
+ " if cm_erreurs[i, j] > 0:\n",
+ " print(f\" Classe {i} prédite comme {j}: {cm_erreurs[i, j]} fois\")\n",
+ "\n",
+ "print(f\"\\n=== Conclusion ===\")\n",
+ "print(\"La matrice 10x10 montre les performances du classifieur sur les 10 chiffres.\")\n",
+ "print(\"La diagonale représente les bonnes classifications.\")\n",
+ "print(\"Les valeurs hors diagonale montrent les confusions entre classes.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "16a1368f",
+ "metadata": {},
"outputs": [],
"source": []
}