{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Classification k-NN Corse - Version ipyleaflet\n",
"\n",
"Cette version utilise **ipyleaflet** avec des interactions Python natives (pas de JavaScript inject\u00e9).\n",
"\n",
"**Avantage :** Fonctionne parfaitement dans Jupyter sans probl\u00e8me d'iframe ou de JavaScript."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Installation\n",
"!pip install ipyleaflet ipywidgets pandas numpy scikit-learn --quiet"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from ipyleaflet import Map, CircleMarker, Marker, Polyline, LayerGroup, WidgetControl, AwesomeIcon\n",
"from ipywidgets import HTML, VBox, HBox, Label, IntSlider, Output\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from IPython.display import display\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Chargement des donn\u00e9es"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Chargement\n",
"df_coords = pd.read_csv('communes-de-corse-en-corse-et-francais.csv', sep=';', encoding='utf-8')\n",
"df_territoires = pd.read_csv('communes-par-territoire-de-projet-de-la-collectivite-territoriale-de-corse0.csv', \n",
" sep=';', encoding='utf-8')\n",
"\n",
"print(f\"\u2705 {len(df_coords)} communes avec coordonn\u00e9es\")\n",
"print(f\"\u2705 {len(df_territoires)} communes avec territoires\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Extraction coordonn\u00e9es\n",
"def extract_coordinates(point_geo_str):\n",
" if pd.isna(point_geo_str):\n",
" return None, None\n",
" try:\n",
" coords = str(point_geo_str).strip().split(',')\n",
" if len(coords) == 2:\n",
" return float(coords[0].strip()), float(coords[1].strip())\n",
" except:\n",
" pass\n",
" return None, None\n",
"\n",
"df_coords[['Latitude', 'Longitude']] = df_coords['Point_Geo'].apply(\n",
" lambda x: pd.Series(extract_coordinates(x))\n",
")\n",
"\n",
"print(f\"\u2705 {df_coords['Latitude'].notna().sum()} coordonn\u00e9es extraites\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Fusion\n",
"def normalize(name):\n",
" return str(name).upper().strip() if not pd.isna(name) else ''\n",
"\n",
"df_coords['Commune_norm'] = df_coords['Nom fran\u00e7ais'].apply(normalize)\n",
"df_territoires['Commune_norm'] = df_territoires['Commune'].apply(normalize)\n",
"\n",
"df = pd.merge(df_coords, df_territoires[['Commune_norm', 'Territoire de projet']], \n",
" on='Commune_norm', how='inner')\n",
"df['Commune'] = df['Nom fran\u00e7ais']\n",
"df_clean = df.dropna(subset=['Latitude', 'Longitude', 'Territoire de projet']).copy()\n",
"\n",
"print(f\"\u2705 {len(df_clean)} communes fusionn\u00e9es\")\n",
"print(f\"\\nMicro-r\u00e9gions: {len(df_clean['Territoire de projet'].unique())}\")\n",
"for region in sorted(df_clean['Territoire de projet'].unique()):\n",
" count = (df_clean['Territoire de projet'] == region).sum()\n",
" print(f\" \u2022 {region}: {count} communes\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Entra\u00eenement k-NN"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Mod\u00e8le k-NN\n",
"X = df_clean[['Latitude', 'Longitude']].values\n",
"y = df_clean['Territoire de projet'].values\n",
"\n",
"knn = KNeighborsClassifier(n_neighbors=5, weights='distance', metric='haversine')\n",
"X_rad = np.radians(X)\n",
"knn.fit(X_rad, y)\n",
"\n",
"print(f\"\u2705 Mod\u00e8le k-NN entra\u00een\u00e9\")\n",
"print(f\"\u2705 {len(df_clean)} communes\")\n",
"print(f\"\u2705 {len(np.unique(y))} micro-r\u00e9gions\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Configuration des couleurs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Couleurs par micro-r\u00e9gion\n",
"microregions = sorted(df_clean['Territoire de projet'].unique())\n",
"colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', \n",
" 'lightcoral', 'beige', 'darkblue', 'darkgreen', 'cadetblue', \n",
" 'darkviolet', 'pink', 'lightblue', 'lightgreen', 'gray']\n",
"\n",
"color_map = {region: colors[i % len(colors)] for i, region in enumerate(microregions)}\n",
"\n",
"print(\"\u2705 Couleurs configur\u00e9es:\")\n",
"for region, color in sorted(color_map.items()):\n",
" print(f\" \u2022 {region}: {color}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Cr\u00e9ation de la carte interactive"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Carte ipyleaflet\n",
"center_lat = df_clean['Latitude'].mean()\n",
"center_lon = df_clean['Longitude'].mean()\n",
"\n",
"m = Map(\n",
" center=(center_lat, center_lon),\n",
" zoom=9,\n",
" scroll_wheel_zoom=True\n",
")\n",
"\n",
"# Ajouter les communes\n",
"commune_layer = LayerGroup(name='Communes')\n",
"\n",
"for idx, row in df_clean.iterrows():\n",
" marker = CircleMarker(\n",
" location=(row['Latitude'], row['Longitude']),\n",
" radius=3,\n",
" color=color_map[row['Territoire de projet']],\n",
" fill_color=color_map[row['Territoire de projet']],\n",
" fill_opacity=0.7,\n",
" weight=1\n",
" )\n",
" # Popup avec info\n",
" marker.popup = HTML(f\"{row['Commune']}
{row['Territoire de projet']}\")\n",
" commune_layer.add_layer(marker)\n",
"\n",
"m.add_layer(commune_layer)\n",
"\n",
"print(f\"\u2705 {len(df_clean)} communes ajout\u00e9es \u00e0 la carte\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 5. Interface interactive avec widgets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Widgets\n",
"k_slider = IntSlider(\n",
" value=5,\n",
" min=1,\n",
" max=15,\n",
" step=1,\n",
" description='k voisins:',\n",
" continuous_update=False\n",
")\n",
"\n",
"info_html = HTML(\n",
" value=\"
\n",
" Coordonn\u00e9es:
\n",
" Lat: {lat:.5f}\u00b0
\n",
" Lon: {lon:.5f}\u00b0\n",
"
{k_value} plus proches communes:
\n", "