diff --git a/activite1/knn_corse_interactive_v2.ipynb b/activite1/knn_corse_interactive_v2.ipynb
deleted file mode 100644
index a5ac185..0000000
--- a/activite1/knn_corse_interactive_v2.ipynb
+++ /dev/null
@@ -1,744 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 🗺️ Classification k-NN : Haute-Corse ou Corse du Sud ?\n",
- "\n",
- "## Objectif\n",
- "Utiliser l'algorithme des **k plus proches voisins (k-NN)** pour déterminer si un point de la carte de Corse se situe en **Haute-Corse (2B)** ou en **Corse du Sud (2A)**, en se basant sur les villages les plus proches.\n",
- "\n",
- "## Principe\n",
- "1. On charge les données des villages corses avec leurs coordonnées GPS et leur département\n",
- "2. On choisit un point sur la carte\n",
- "3. On calcule les distances entre ce point et tous les villages\n",
- "4. On identifie les k villages les plus proches\n",
- "5. On vote : le département majoritaire parmi ces k villages devient la prédiction"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 📦 Installation et imports"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Installation des bibliothèques nécessaires (si besoin)\n",
- "import sys\n",
- "!{sys.executable} -m pip install folium pandas numpy -q"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "import folium\n",
- "from folium.plugins import MarkerCluster\n",
- "import math\n",
- "import json\n",
- "from collections import Counter"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 📊 Chargement des données"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Charger le fichier CSV\n",
- "# Remplacez 'villages_corse.csv' par le chemin de votre fichier\n",
- "df = pd.read_csv('villages_corse.csv', sep='\\t', encoding='utf-8')\n",
- "\n",
- "# Afficher les premières lignes\n",
- "print(f\"Nombre de villages : {len(df)}\")\n",
- "print(f\"\\nColonnes : {list(df.columns)}\")\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 🔧 Préparation des données"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def parse_coordinates(point_geo_str):\n",
- " \"\"\"\n",
- " Parse la colonne Point_Geo pour extraire latitude et longitude.\n",
- " Format attendu : \"latitude, longitude\"\n",
- " Exemple : \"41.984099158, 8.798384636\"\n",
- " \"\"\"\n",
- " try:\n",
- " # Séparer par la virgule\n",
- " parts = str(point_geo_str).split(',')\n",
- " lat = float(parts[0].strip())\n",
- " lon = float(parts[1].strip())\n",
- " return lat, lon\n",
- " except Exception as e:\n",
- " print(f\"Erreur parsing: {point_geo_str} - {e}\")\n",
- " return None, None\n",
- "\n",
- "# Extraire les coordonnées\n",
- "df[['latitude', 'longitude']] = df['Point_Geo'].apply(\n",
- " lambda x: pd.Series(parse_coordinates(x))\n",
- ")\n",
- "\n",
- "# Supprimer les lignes sans coordonnées valides\n",
- "df = df.dropna(subset=['latitude', 'longitude'])\n",
- "\n",
- "# Simplifier les noms de départements\n",
- "df['dept_simple'] = df['Code Département'].apply(lambda x: '2A' if str(x) == '2A' else '2B')\n",
- "\n",
- "print(f\"Villages avec coordonnées valides : {len(df)}\")\n",
- "print(f\"\\nRépartition par département :\")\n",
- "print(df['dept_simple'].value_counts())\n",
- "\n",
- "df[['Nom français', 'dept_simple', 'latitude', 'longitude']].head(10)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 📏 Fonction de calcul de distance\n",
- "\n",
- "Nous utilisons la **formule de Haversine** pour calculer la distance entre deux points GPS sur la surface de la Terre."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def haversine_distance(lat1, lon1, lat2, lon2):\n",
- " \"\"\"\n",
- " Calcule la distance en kilomètres entre deux points GPS.\n",
- " Formule de Haversine.\n",
- " \"\"\"\n",
- " R = 6371 # Rayon de la Terre en km\n",
- " \n",
- " # Conversion en radians\n",
- " lat1_rad = math.radians(lat1)\n",
- " lat2_rad = math.radians(lat2)\n",
- " delta_lat = math.radians(lat2 - lat1)\n",
- " delta_lon = math.radians(lon2 - lon1)\n",
- " \n",
- " # Formule de Haversine\n",
- " a = math.sin(delta_lat/2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon/2)**2\n",
- " c = 2 * math.asin(math.sqrt(a))\n",
- " \n",
- " return R * c\n",
- "\n",
- "# Test de la fonction\n",
- "# Distance entre Ajaccio (41.9267, 8.7369) et Bastia (42.7028, 9.4500)\n",
- "dist_test = haversine_distance(41.9267, 8.7369, 42.7028, 9.4500)\n",
- "print(f\"Distance Ajaccio-Bastia : {dist_test:.1f} km\")\n",
- "\n",
- "# Test avec Afa et Alando (vos exemples)\n",
- "afa = df[df['Nom français'] == 'Afa'].iloc[0]\n",
- "alando = df[df['Nom français'] == 'Alando'].iloc[0]\n",
- "dist_afa_alando = haversine_distance(afa['latitude'], afa['longitude'], \n",
- " alando['latitude'], alando['longitude'])\n",
- "print(f\"Distance Afa-Alando : {dist_afa_alando:.1f} km\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 🎯 Algorithme k-NN"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def knn_classify(test_lat, test_lon, df, k=5):\n",
- " \"\"\"\n",
- " Classifie un point (test_lat, test_lon) en utilisant k-NN.\n",
- " \n",
- " Retourne :\n",
- " - prediction : le département prédit ('2A' ou '2B')\n",
- " - neighbors : DataFrame des k plus proches voisins\n",
- " - votes : dictionnaire des votes\n",
- " \"\"\"\n",
- " # Calculer les distances pour tous les villages\n",
- " distances = []\n",
- " for idx, row in df.iterrows():\n",
- " dist = haversine_distance(test_lat, test_lon, row['latitude'], row['longitude'])\n",
- " distances.append({\n",
- " 'village': row['Nom français'],\n",
- " 'nom_corse': row['Nom corse'],\n",
- " 'departement': row['dept_simple'],\n",
- " 'latitude': row['latitude'],\n",
- " 'longitude': row['longitude'],\n",
- " 'distance': dist\n",
- " })\n",
- " \n",
- " # Créer un DataFrame et trier par distance\n",
- " dist_df = pd.DataFrame(distances)\n",
- " dist_df = dist_df.sort_values('distance')\n",
- " \n",
- " # Sélectionner les k plus proches\n",
- " neighbors = dist_df.head(k)\n",
- " \n",
- " # Voter\n",
- " votes = Counter(neighbors['departement'])\n",
- " prediction = votes.most_common(1)[0][0]\n",
- " \n",
- " return prediction, neighbors, votes\n",
- "\n",
- "# Test de l'algorithme avec un point au centre de la Corse\n",
- "test_lat, test_lon = 42.15, 9.05\n",
- "k = 5\n",
- "\n",
- "prediction, neighbors, votes = knn_classify(test_lat, test_lon, df, k=k)\n",
- "\n",
- "print(f\"\\n🎯 Point de test : ({test_lat}, {test_lon})\")\n",
- "print(f\"\\nAvec k={k} :\")\n",
- "print(f\"Prédiction : {'Corse du Sud (2A)' if prediction == '2A' else 'Haute-Corse (2B)'}\")\n",
- "print(f\"Votes : {dict(votes)}\")\n",
- "print(f\"\\nLes {k} plus proches voisins :\")\n",
- "print(neighbors[['village', 'nom_corse', 'departement', 'distance']])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 🗺️ Visualisation avec Folium"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def create_map(test_lat=None, test_lon=None, k=5, show_all_villages=False, show_boundaries=False):\n",
- " \"\"\"\n",
- " Crée une carte interactive avec Folium.\n",
- " \n",
- " Paramètres:\n",
- " - test_lat, test_lon: coordonnées du point à tester\n",
- " - k: nombre de voisins\n",
- " - show_all_villages: afficher tous les villages\n",
- " - show_boundaries: afficher les frontières des communes (peut être lent)\n",
- " \"\"\"\n",
- " # Centre de la Corse\n",
- " center_lat = 42.15\n",
- " center_lon = 9.05\n",
- " \n",
- " # Créer la carte\n",
- " m = folium.Map(\n",
- " location=[center_lat, center_lon],\n",
- " zoom_start=9,\n",
- " tiles='OpenStreetMap'\n",
- " )\n",
- " \n",
- " # Afficher les frontières des communes (optionnel)\n",
- " if show_boundaries:\n",
- " print(\"Affichage des frontières des communes...\")\n",
- " for idx, row in df.iterrows():\n",
- " try:\n",
- " zone_geo = json.loads(row['Zone_geo'])\n",
- " color = 'red' if row['dept_simple'] == '2A' else 'blue'\n",
- " \n",
- " folium.GeoJson(\n",
- " zone_geo,\n",
- " style_function=lambda x, color=color: {\n",
- " 'fillColor': color,\n",
- " 'color': color,\n",
- " 'weight': 1,\n",
- " 'fillOpacity': 0.1\n",
- " },\n",
- " tooltip=row['Nom français']\n",
- " ).add_to(m)\n",
- " except:\n",
- " pass\n",
- " \n",
- " # Afficher tous les villages (optionnel)\n",
- " if show_all_villages:\n",
- " marker_cluster = MarkerCluster().add_to(m)\n",
- " \n",
- " for idx, row in df.iterrows():\n",
- " color = 'red' if row['dept_simple'] == '2A' else 'blue'\n",
- " folium.CircleMarker(\n",
- " location=[row['latitude'], row['longitude']],\n",
- " radius=3,\n",
- " color=color,\n",
- " fill=True,\n",
- " fillColor=color,\n",
- " fillOpacity=0.4,\n",
- " popup=f\"{row['Nom français']}
{row['Nom corse']}
({row['dept_simple']})\"\n",
- " ).add_to(marker_cluster)\n",
- " \n",
- " # Si un point de test est fourni\n",
- " if test_lat is not None and test_lon is not None:\n",
- " # Classification\n",
- " prediction, neighbors, votes = knn_classify(test_lat, test_lon, df, k=k)\n",
- " \n",
- " # Marqueur pour le point de test\n",
- " color = 'darkred' if prediction == '2A' else 'darkblue'\n",
- " dept_name = 'Corse du Sud (2A)' if prediction == '2A' else 'Haute-Corse (2B)'\n",
- " \n",
- " folium.Marker(\n",
- " location=[test_lat, test_lon],\n",
- " popup=f\"Point à classifier
Prédiction : {dept_name}
Votes : {dict(votes)}\",\n",
- " icon=folium.Icon(color=color, icon='star', prefix='fa')\n",
- " ).add_to(m)\n",
- " \n",
- " # Afficher les k plus proches voisins\n",
- " for idx, neighbor in neighbors.iterrows():\n",
- " # Marqueur pour chaque voisin\n",
- " color = 'red' if neighbor['departement'] == '2A' else 'blue'\n",
- " folium.Marker(\n",
- " location=[neighbor['latitude'], neighbor['longitude']],\n",
- " popup=f\"{neighbor['village']}
{neighbor['nom_corse']}
{neighbor['departement']}
Distance: {neighbor['distance']:.2f} km\",\n",
- " icon=folium.Icon(color=color, icon='info-sign')\n",
- " ).add_to(m)\n",
- " \n",
- " # Ligne entre le point test et le voisin\n",
- " folium.PolyLine(\n",
- " locations=[\n",
- " [test_lat, test_lon],\n",
- " [neighbor['latitude'], neighbor['longitude']]\n",
- " ],\n",
- " color=color,\n",
- " weight=2,\n",
- " opacity=0.5,\n",
- " tooltip=f\"{neighbor['distance']:.2f} km\"\n",
- " ).add_to(m)\n",
- " \n",
- " # Légende\n",
- " legend_html = '''\n",
- "
Légende
\n", - "Corse du Sud (2A)
\n", - "Haute-Corse (2B)
\n", - "Point à classifier
\n", - "