Supprimer activite2/classification_microregions_corse.ipynb

This commit is contained in:
lprik 2025-10-23 13:46:58 +00:00
parent 99d6f3af53
commit 83a9ef30c0

View file

@ -1,620 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Classification des Micro-régions de Corse par K-NN\n",
"\n",
"Ce notebook implémente un système de classification des micro-régions corses basé sur l'algorithme des k plus proches voisins (k-NN). Cliquez sur la carte pour identifier la micro-région correspondante."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Installation des bibliothèques nécessaires\n",
"!pip install folium pandas numpy scikit-learn geopy --quiet"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import folium\n",
"from folium import plugins\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from geopy.geocoders import Nominatim\n",
"from geopy.extra.rate_limiter import RateLimiter\n",
"import time\n",
"from IPython.display import display, HTML\n",
"import json"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Chargement et préparation des données"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Chargement du fichier CSV\n",
"df = pd.read_csv('communes-par-territoire-de-projet-de-la-collectivite-territoriale-de-corse0.csv', \n",
" sep=';', encoding='utf-8')\n",
"\n",
"print(f\"Nombre de communes: {len(df)}\")\n",
"print(\"\\nPremières lignes:\")\n",
"display(df.head())\n",
"print(\"\\nTerritoires de projet (micro-régions):\")\n",
"print(df['Territoire de projet'].unique())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Géocodage des communes\n",
"\n",
"Si le fichier ne contient pas déjà les coordonnées GPS, nous les récupérons via géocodage."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Fonction pour obtenir les coordonnées GPS d'une commune\n",
"def geocode_commune(commune, departement, code_postal):\n",
" \"\"\"\n",
" Géocode une commune corse pour obtenir ses coordonnées GPS\n",
" \"\"\"\n",
" geolocator = Nominatim(user_agent=\"corse_knn_classifier\")\n",
" \n",
" try:\n",
" # Essai avec le nom de la commune et Corse\n",
" query = f\"{commune}, Corse, France\"\n",
" location = geolocator.geocode(query, timeout=10)\n",
" \n",
" if location:\n",
" return location.latitude, location.longitude\n",
" \n",
" # Essai avec le code postal\n",
" query = f\"{commune}, {code_postal}, France\"\n",
" location = geolocator.geocode(query, timeout=10)\n",
" \n",
" if location:\n",
" return location.latitude, location.longitude\n",
" \n",
" except Exception as e:\n",
" print(f\"Erreur pour {commune}: {e}\")\n",
" \n",
" return None, None\n",
"\n",
"# Vérifier si les colonnes GPS existent déjà\n",
"if 'Latitude' not in df.columns or 'Longitude' not in df.columns:\n",
" print(\"Géocodage des communes en cours... (cela peut prendre quelques minutes)\")\n",
" \n",
" # Géocodage avec rate limiting pour respecter les limites de l'API\n",
" latitudes = []\n",
" longitudes = []\n",
" \n",
" for idx, row in df.iterrows():\n",
" lat, lon = geocode_commune(row['Commune'], row['Département'], row['Code Postal'])\n",
" latitudes.append(lat)\n",
" longitudes.append(lon)\n",
" \n",
" # Affichage de la progression\n",
" if (idx + 1) % 10 == 0:\n",
" print(f\"Progression: {idx + 1}/{len(df)} communes géocodées\")\n",
" \n",
" # Pause pour respecter les limites de l'API\n",
" time.sleep(1.5)\n",
" \n",
" df['Latitude'] = latitudes\n",
" df['Longitude'] = longitudes\n",
" \n",
" # Sauvegarde du dataframe avec coordonnées\n",
" df.to_csv('communes_corse_avec_gps.csv', sep=';', index=False, encoding='utf-8')\n",
" print(\"\\nGéocodage terminé et sauvegardé dans 'communes_corse_avec_gps.csv'\")\n",
"else:\n",
" print(\"Les coordonnées GPS sont déjà présentes dans le fichier.\")\n",
"\n",
"# Supprimer les lignes sans coordonnées\n",
"df_clean = df.dropna(subset=['Latitude', 'Longitude']).copy()\n",
"print(f\"\\nCommunes avec coordonnées GPS: {len(df_clean)}/{len(df)}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Entraînement du modèle k-NN"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Préparation des données pour k-NN\n",
"X = df_clean[['Latitude', 'Longitude']].values\n",
"y = df_clean['Territoire de projet'].values\n",
"\n",
"# Création du modèle k-NN avec k=5 (ajustable)\n",
"k = 5\n",
"knn = KNeighborsClassifier(n_neighbors=k, weights='distance', metric='haversine')\n",
"\n",
"# Conversion des coordonnées en radians pour la distance haversine\n",
"X_rad = np.radians(X)\n",
"\n",
"# Entraînement du modèle\n",
"knn.fit(X_rad, y)\n",
"\n",
"print(f\"Modèle k-NN entraîné avec k={k} voisins\")\n",
"print(f\"Nombre de micro-régions: {len(np.unique(y))}\")\n",
"print(f\"Micro-régions: {sorted(np.unique(y))}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Création de la carte interactive avec Folium"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Couleurs pour chaque micro-région\n",
"microregions = sorted(df_clean['Territoire de projet'].unique())\n",
"colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', \n",
" 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', \n",
" 'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']\n",
"\n",
"color_map = {region: colors[i % len(colors)] for i, region in enumerate(microregions)}\n",
"\n",
"print(\"Carte des couleurs par micro-région:\")\n",
"for region, color in color_map.items():\n",
" print(f\" {region}: {color}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Coordonnées du centre de la Corse\n",
"center_lat = df_clean['Latitude'].mean()\n",
"center_lon = df_clean['Longitude'].mean()\n",
"\n",
"# Création de la carte\n",
"m = folium.Map(\n",
" location=[center_lat, center_lon],\n",
" zoom_start=9,\n",
" tiles='OpenStreetMap'\n",
")\n",
"\n",
"# Ajout des marqueurs pour chaque commune\n",
"for idx, row in df_clean.iterrows():\n",
" folium.CircleMarker(\n",
" location=[row['Latitude'], row['Longitude']],\n",
" radius=3,\n",
" popup=f\"<b>{row['Commune']}</b><br>{row['Territoire de projet']}\",\n",
" tooltip=row['Commune'],\n",
" color=color_map[row['Territoire de projet']],\n",
" fill=True,\n",
" fillColor=color_map[row['Territoire de projet']],\n",
" fillOpacity=0.7\n",
" ).add_to(m)\n",
"\n",
"# Ajout d'une légende\n",
"legend_html = '''\n",
"<div style=\"position: fixed; \n",
" top: 10px; right: 10px; width: 250px; height: auto; \n",
" background-color: white; border:2px solid grey; z-index:9999; \n",
" font-size:12px; padding: 10px\">\n",
"<p style=\"margin-bottom: 5px;\"><b>Micro-régions de Corse</b></p>\n",
"'''\n",
"\n",
"for region, color in sorted(color_map.items()):\n",
" legend_html += f'<p style=\"margin: 3px 0;\"><i class=\"fa fa-circle\" style=\"color:{color}\"></i> {region}</p>'\n",
"\n",
"legend_html += '</div>'\n",
"\n",
"m.get_root().html.add_child(folium.Element(legend_html))\n",
"\n",
"# Ajout du plugin de clic\n",
"# Note: Folium ne supporte pas nativement l'interactivité côté Python en temps réel\n",
"# Nous allons créer une version avec JavaScript pour la prédiction\n",
"\n",
"print(\"Carte de base créée avec les communes colorées par micro-région\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 5. Carte interactive avec prédiction au clic\n",
"\n",
"Cette version utilise JavaScript pour permettre de cliquer sur la carte et afficher la micro-région prédite."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Création d'une nouvelle carte avec interaction JavaScript\n",
"m_interactive = folium.Map(\n",
" location=[center_lat, center_lon],\n",
" zoom_start=9,\n",
" tiles='OpenStreetMap'\n",
")\n",
"\n",
"# Ajout des communes\n",
"for idx, row in df_clean.iterrows():\n",
" folium.CircleMarker(\n",
" location=[row['Latitude'], row['Longitude']],\n",
" radius=3,\n",
" popup=f\"<b>{row['Commune']}</b><br>{row['Territoire de projet']}\",\n",
" tooltip=row['Commune'],\n",
" color=color_map[row['Territoire de projet']],\n",
" fill=True,\n",
" fillColor=color_map[row['Territoire de projet']],\n",
" fillOpacity=0.7\n",
" ).add_to(m_interactive)\n",
"\n",
"# Préparer les données des communes pour JavaScript\n",
"communes_data = df_clean[['Latitude', 'Longitude', 'Commune', 'Territoire de projet']].to_dict('records')\n",
"\n",
"# JavaScript pour la prédiction k-NN au clic\n",
"click_js = f\"\"\"\n",
"<script>\n",
"// Données des communes\n",
"var communesData = {json.dumps(communes_data)};\n",
"\n",
"// Carte des couleurs\n",
"var colorMap = {json.dumps(color_map)};\n",
"\n",
"// Fonction pour calculer la distance haversine\n",
"function haversineDistance(lat1, lon1, lat2, lon2) {{\n",
" const R = 6371; // Rayon de la Terre en km\n",
" const dLat = (lat2 - lat1) * Math.PI / 180;\n",
" const dLon = (lon2 - lon1) * Math.PI / 180;\n",
" const a = Math.sin(dLat/2) * Math.sin(dLat/2) +\n",
" Math.cos(lat1 * Math.PI / 180) * Math.cos(lat2 * Math.PI / 180) *\n",
" Math.sin(dLon/2) * Math.sin(dLon/2);\n",
" const c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a));\n",
" return R * c;\n",
"}}\n",
"\n",
"// Fonction k-NN\n",
"function predictRegion(lat, lon, k) {{\n",
" // Calculer les distances\n",
" var distances = communesData.map(function(commune) {{\n",
" return {{\n",
" distance: haversineDistance(lat, lon, commune.Latitude, commune.Longitude),\n",
" region: commune['Territoire de projet'],\n",
" commune: commune.Commune\n",
" }};\n",
" }});\n",
" \n",
" // Trier par distance\n",
" distances.sort((a, b) => a.distance - b.distance);\n",
" \n",
" // Prendre les k plus proches\n",
" var kNearest = distances.slice(0, k);\n",
" \n",
" // Vote pondéré par l'inverse de la distance\n",
" var votes = {{}};\n",
" kNearest.forEach(function(neighbor) {{\n",
" var weight = 1 / (neighbor.distance + 0.001); // +0.001 pour éviter division par 0\n",
" if (votes[neighbor.region]) {{\n",
" votes[neighbor.region] += weight;\n",
" }} else {{\n",
" votes[neighbor.region] = weight;\n",
" }}\n",
" }});\n",
" \n",
" // Trouver la région gagnante\n",
" var maxVote = 0;\n",
" var predictedRegion = '';\n",
" for (var region in votes) {{\n",
" if (votes[region] > maxVote) {{\n",
" maxVote = votes[region];\n",
" predictedRegion = region;\n",
" }}\n",
" }}\n",
" \n",
" return {{\n",
" region: predictedRegion,\n",
" neighbors: kNearest\n",
" }};\n",
"}}\n",
"\n",
"// Variable pour stocker le marqueur de prédiction\n",
"var predictionMarker = null;\n",
"var neighborLines = [];\n",
"\n",
"// Attendre que la carte soit chargée\n",
"setTimeout(function() {{\n",
" var maps = document.querySelectorAll('.folium-map');\n",
" if (maps.length > 0) {{\n",
" var mapElement = maps[maps.length - 1];\n",
" var leafletMap = mapElement._leaflet_map;\n",
" \n",
" if (leafletMap) {{\n",
" leafletMap.on('click', function(e) {{\n",
" var lat = e.latlng.lat;\n",
" var lon = e.latlng.lng;\n",
" \n",
" // Prédiction avec k={k}\n",
" var result = predictRegion(lat, lon, {k});\n",
" \n",
" // Supprimer l'ancien marqueur et lignes\n",
" if (predictionMarker) {{\n",
" leafletMap.removeLayer(predictionMarker);\n",
" }}\n",
" neighborLines.forEach(function(line) {{\n",
" leafletMap.removeLayer(line);\n",
" }});\n",
" neighborLines = [];\n",
" \n",
" // Créer le popup avec informations détaillées\n",
" var popupContent = '<div style=\"min-width: 200px;\">' +\n",
" '<h4 style=\"margin: 5px 0;\">Prédiction k-NN</h4>' +\n",
" '<p style=\"margin: 5px 0;\"><b>Micro-région:</b> ' + result.region + '</p>' +\n",
" '<p style=\"margin: 5px 0;\"><b>Coordonnées:</b><br>' + \n",
" 'Lat: ' + lat.toFixed(5) + '<br>Lon: ' + lon.toFixed(5) + '</p>' +\n",
" '<p style=\"margin: 5px 0;\"><b>{k} plus proches communes:</b></p>' +\n",
" '<ul style=\"margin: 5px 0; padding-left: 20px; font-size: 11px;\">';\n",
" \n",
" result.neighbors.forEach(function(neighbor) {{\n",
" popupContent += '<li>' + neighbor.commune + \n",
" ' (' + neighbor.distance.toFixed(2) + ' km)</li>';\n",
" }});\n",
" \n",
" popupContent += '</ul></div>';\n",
" \n",
" // Ajouter le nouveau marqueur\n",
" predictionMarker = L.marker([lat, lon], {{\n",
" icon: L.divIcon({{\n",
" className: 'prediction-marker',\n",
" html: '<div style=\"background-color: ' + colorMap[result.region] + \n",
" '; width: 20px; height: 20px; border-radius: 50%; ' +\n",
" 'border: 3px solid white; box-shadow: 0 0 10px rgba(0,0,0,0.5);\"></div>',\n",
" iconSize: [20, 20]\n",
" }})\n",
" }}).addTo(leafletMap);\n",
" \n",
" predictionMarker.bindPopup(popupContent).openPopup();\n",
" \n",
" // Ajouter des lignes vers les k plus proches voisins\n",
" result.neighbors.forEach(function(neighbor) {{\n",
" var commune = communesData.find(c => c.Commune === neighbor.commune);\n",
" if (commune) {{\n",
" var line = L.polyline(\n",
" [[lat, lon], [commune.Latitude, commune.Longitude]],\n",
" {{\n",
" color: 'gray',\n",
" weight: 1,\n",
" opacity: 0.5,\n",
" dashArray: '5, 5'\n",
" }}\n",
" ).addTo(leafletMap);\n",
" neighborLines.push(line);\n",
" }}\n",
" }});\n",
" }});\n",
" \n",
" console.log('Gestionnaire de clic k-NN activé');\n",
" }}\n",
" }}\n",
"}}, 1000);\n",
"</script>\n",
"\"\"\"\n",
"\n",
"m_interactive.get_root().html.add_child(folium.Element(click_js))\n",
"\n",
"# Ajout de la légende\n",
"m_interactive.get_root().html.add_child(folium.Element(legend_html))\n",
"\n",
"# Ajout d'instructions\n",
"instructions_html = '''\n",
"<div style=\"position: fixed; \n",
" bottom: 10px; left: 10px; width: 300px; \n",
" background-color: white; border:2px solid grey; z-index:9999; \n",
" font-size:13px; padding: 10px\">\n",
"<p style=\"margin: 0;\"><b>🖱️ Instructions:</b></p>\n",
"<p style=\"margin: 5px 0;\">Cliquez n'importe où sur la carte pour prédire la micro-région à partir de l'algorithme k-NN.</p>\n",
"<p style=\"margin: 5px 0;\">Les lignes pointillées montrent les k plus proches communes utilisées pour la prédiction.</p>\n",
"</div>\n",
"'''\n",
"\n",
"m_interactive.get_root().html.add_child(folium.Element(instructions_html))\n",
"\n",
"print(\"Carte interactive créée avec succès!\")\n",
"print(f\"\\nCliquez sur n'importe quel point de la carte pour prédire sa micro-région avec k={k} voisins.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Affichage de la carte interactive\n",
"m_interactive"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6. Sauvegarde de la carte"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Sauvegarder la carte interactive\n",
"m_interactive.save('carte_corse_knn_interactive.html')\n",
"print(\"Carte sauvegardée dans 'carte_corse_knn_interactive.html'\")\n",
"print(\"Vous pouvez ouvrir ce fichier dans un navigateur pour une utilisation autonome.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7. Test de la prédiction (optionnel)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Fonction pour tester la prédiction sur des coordonnées spécifiques\n",
"def predict_region(lat, lon, k_value=5):\n",
" \"\"\"\n",
" Prédit la micro-région pour des coordonnées données\n",
" \"\"\"\n",
" # Conversion en radians\n",
" coords_rad = np.radians([[lat, lon]])\n",
" \n",
" # Prédiction\n",
" prediction = knn.predict(coords_rad)[0]\n",
" \n",
" # Trouver les k plus proches voisins\n",
" distances, indices = knn.kneighbors(coords_rad)\n",
" \n",
" # Convertir les distances de radians en km\n",
" distances_km = distances[0] * 6371 # Rayon de la Terre en km\n",
" \n",
" print(f\"\\n📍 Coordonnées: {lat:.5f}, {lon:.5f}\")\n",
" print(f\"🎯 Micro-région prédite: {prediction}\")\n",
" print(f\"\\n{k_value} plus proches communes:\")\n",
" \n",
" for i, idx in enumerate(indices[0]):\n",
" commune_info = df_clean.iloc[idx]\n",
" print(f\" {i+1}. {commune_info['Commune']} ({commune_info['Territoire de projet']}) - {distances_km[i]:.2f} km\")\n",
" \n",
" return prediction\n",
"\n",
"# Exemples de test\n",
"print(\"=\" * 60)\n",
"print(\"TESTS DE PRÉDICTION\")\n",
"print(\"=\" * 60)\n",
"\n",
"# Test 1: Centre approximatif de la Corse\n",
"predict_region(42.15, 9.15, k)\n",
"\n",
"# Test 2: Nord de la Corse (Balagne)\n",
"predict_region(42.55, 8.85, k)\n",
"\n",
"# Test 3: Sud de la Corse\n",
"predict_region(41.65, 9.15, k)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 8. Analyse de performance (optionnel)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Évaluation de la cohérence du modèle (cross-validation)\n",
"from sklearn.model_selection import cross_val_score\n",
"\n",
"# Test avec différentes valeurs de k\n",
"k_values = [3, 5, 7, 9, 11]\n",
"scores = []\n",
"\n",
"print(\"Évaluation de la précision pour différentes valeurs de k:\\n\")\n",
"\n",
"for k_val in k_values:\n",
" knn_temp = KNeighborsClassifier(n_neighbors=k_val, weights='distance', metric='haversine')\n",
" cv_scores = cross_val_score(knn_temp, X_rad, y, cv=5)\n",
" mean_score = cv_scores.mean()\n",
" scores.append(mean_score)\n",
" print(f\"k={k_val:2d}: Précision moyenne = {mean_score:.3f} (+/- {cv_scores.std():.3f})\")\n",
"\n",
"# Visualisation simple\n",
"print(f\"\\n✨ Meilleure valeur de k: {k_values[scores.index(max(scores))]} (précision: {max(scores):.3f})\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Conclusion\n",
"\n",
"Ce notebook implémente un classificateur k-NN pour les micro-régions de Corse avec:\n",
"- ✅ Chargement et géocodage des communes corses\n",
"- ✅ Entraînement d'un modèle k-NN avec distance haversine\n",
"- ✅ Carte interactive Folium avec prédiction au clic\n",
"- ✅ Visualisation des k plus proches voisins\n",
"- ✅ Légende et instructions pour l'utilisateur\n",
"\n",
"**Utilisation:**\n",
"1. Cliquez n'importe où sur la carte\n",
"2. Un marqueur coloré apparaît avec la micro-région prédite\n",
"3. Des lignes pointillées montrent les k communes les plus proches\n",
"4. Un popup détaille la prédiction et les voisins\n",
"\n",
"La carte HTML peut être ouverte dans n'importe quel navigateur pour une utilisation autonome!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}