{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 🗺️ k-NN Corse : Version Interactive avec Clic sur Carte\n", "\n", "## 🎮 Mode d'emploi\n", "1. Exécutez toutes les cellules\n", "2. **Cliquez sur la carte** pour choisir un point\n", "3. Ajustez k avec le curseur\n", "4. Observez la classification en temps réel !" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 📦 Installation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Installation des bibliothèques\n", "import sys\n", "!{sys.executable} -m pip install ipyleaflet ipywidgets pandas numpy -q" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import math\n", "from collections import Counter\n", "from ipyleaflet import Map, Marker, CircleMarker, Polyline, AwesomeIcon, LayerGroup\n", "from ipywidgets import HTML, VBox, HBox, IntSlider, Output, Label\n", "from IPython.display import display, clear_output" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 📊 Chargement des données" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Charger les données\n", "df = pd.read_csv('villages_corse.csv', sep='\\t', encoding='utf-8')\n", "\n", "def parse_coordinates(point_geo_str):\n", " try:\n", " parts = str(point_geo_str).split(',')\n", " lat = float(parts[0].strip())\n", " lon = float(parts[1].strip())\n", " return lat, lon\n", " except:\n", " return None, None\n", "\n", "df[['latitude', 'longitude']] = df['Point_Geo'].apply(\n", " lambda x: pd.Series(parse_coordinates(x))\n", ")\n", "\n", "df = df.dropna(subset=['latitude', 'longitude'])\n", "df['dept_simple'] = df['Code Département'].apply(lambda x: '2A' if str(x) == '2A' else '2B')\n", "\n", "print(f\"✅ {len(df)} villages chargés\")\n", "print(f\" - Corse du Sud (2A) : {len(df[df['dept_simple']=='2A'])}\")\n", "print(f\" - Haute-Corse (2B) : {len(df[df['dept_simple']=='2B'])}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 🧮 Fonctions k-NN" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def haversine_distance(lat1, lon1, lat2, lon2):\n", " \"\"\"Calcule la distance en km entre deux points GPS.\"\"\"\n", " R = 6371\n", " lat1_rad = math.radians(lat1)\n", " lat2_rad = math.radians(lat2)\n", " delta_lat = math.radians(lat2 - lat1)\n", " delta_lon = math.radians(lon2 - lon1)\n", " \n", " a = math.sin(delta_lat/2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon/2)**2\n", " c = 2 * math.asin(math.sqrt(a))\n", " \n", " return R * c\n", "\n", "def knn_classify(test_lat, test_lon, df, k=5):\n", " \"\"\"Classifie un point avec k-NN.\"\"\"\n", " distances = []\n", " for idx, row in df.iterrows():\n", " dist = haversine_distance(test_lat, test_lon, row['latitude'], row['longitude'])\n", " distances.append({\n", " 'village': row['Nom français'],\n", " 'nom_corse': row['Nom corse'],\n", " 'departement': row['dept_simple'],\n", " 'latitude': row['latitude'],\n", " 'longitude': row['longitude'],\n", " 'distance': dist\n", " })\n", " \n", " dist_df = pd.DataFrame(distances).sort_values('distance')\n", " neighbors = dist_df.head(k)\n", " votes = Counter(neighbors['departement'])\n", " prediction = votes.most_common(1)[0][0]\n", " \n", " return prediction, neighbors, votes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 🗺️ Carte Interactive\n", "\n", "**Instructions :**\n", "- 🖱️ **Cliquez sur la carte** pour placer un point\n", "- 🎚️ **Ajustez k** avec le curseur\n", "- 👁️ La classification se met à jour automatiquement" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Créer la carte\n", "m = Map(center=(42.15, 9.05), zoom=9, scroll_wheel_zoom=True)\n", "\n", "# Couches pour les éléments dynamiques\n", "test_point_layer = LayerGroup()\n", "neighbors_layer = LayerGroup()\n", "lines_layer = LayerGroup()\n", "\n", "m.add_layer(test_point_layer)\n", "m.add_layer(neighbors_layer)\n", "m.add_layer(lines_layer)\n", "\n", "# Afficher quelques villages de référence\n", "sample_villages = df.sample(n=min(50, len(df)), random_state=42)\n", "for idx, row in sample_villages.iterrows():\n", " color = 'red' if row['dept_simple'] == '2A' else 'blue'\n", " circle = CircleMarker(\n", " location=(row['latitude'], row['longitude']),\n", " radius=3,\n", " color=color,\n", " fill_color=color,\n", " fill_opacity=0.4,\n", " weight=1\n", " )\n", " m.add_layer(circle)\n", "\n", "# Widget pour k\n", "k_slider = IntSlider(\n", " value=5,\n", " min=1,\n", " max=20,\n", " step=1,\n", " description='k:',\n", " continuous_update=False\n", ")\n", "\n", "# Zone de résultats\n", "result_output = Output()\n", "info_html = HTML(value=\"
👆 Cliquez sur la carte pour classifier un point
\")\n", "\n", "# Variable globale pour stocker les coordonnées\n", "current_coords = {'lat': None, 'lon': None}\n", "\n", "def update_classification(lat, lon, k):\n", " \"\"\"Met à jour la classification et la visualisation.\"\"\"\n", " # Effacer les couches précédentes\n", " test_point_layer.clear_layers()\n", " neighbors_layer.clear_layers()\n", " lines_layer.clear_layers()\n", " \n", " # Classification\n", " prediction, neighbors, votes = knn_classify(lat, lon, df, k=k)\n", " \n", " # Couleur selon prédiction\n", " color = 'red' if prediction == '2A' else 'blue'\n", " dept_name = 'Corse du Sud (2A)' if prediction == '2A' else 'Haute-Corse (2B)'\n", " \n", " # Marqueur du point test\n", " icon = AwesomeIcon(\n", " name='star',\n", " marker_color='darkred' if prediction == '2A' else 'darkblue',\n", " icon_color='white'\n", " )\n", " test_marker = Marker(location=(lat, lon), icon=icon, draggable=False)\n", " test_point_layer.add_layer(test_marker)\n", " \n", " # Afficher les k plus proches voisins\n", " for idx, neighbor in neighbors.iterrows():\n", " n_color = 'red' if neighbor['departement'] == '2A' else 'blue'\n", " \n", " # Marqueur du voisin\n", " n_marker = CircleMarker(\n", " location=(neighbor['latitude'], neighbor['longitude']),\n", " radius=8,\n", " color=n_color,\n", " fill_color=n_color,\n", " fill_opacity=0.7,\n", " weight=2\n", " )\n", " neighbors_layer.add_layer(n_marker)\n", " \n", " # Ligne vers le voisin\n", " line = Polyline(\n", " locations=[\n", " (lat, lon),\n", " (neighbor['latitude'], neighbor['longitude'])\n", " ],\n", " color=n_color,\n", " weight=2,\n", " opacity=0.5\n", " )\n", " lines_layer.add_layer(line)\n", " \n", " # Afficher les résultats\n", " with result_output:\n", " clear_output(wait=True)\n", " print(f\"📍 Coordonnées : ({lat:.4f}, {lon:.4f})\")\n", " print(f\"🔢 k = {k}\")\n", " print(f\"\\n🎯 Prédiction : {dept_name}\")\n", " print(f\"📊 Votes : 2A={votes.get('2A', 0)}, 2B={votes.get('2B', 0)}\")\n", " print(f\"\\n🏘️ Les {k} plus proches villages :\")\n", " print(neighbors[['village', 'nom_corse', 'departement', 'distance']].to_string(index=False))\n", " \n", " # Mettre à jour l'info\n", " info_html.value = f\"