From 0bb5ae732fbab1d7d4d5c71de17a869dc521b82c Mon Sep 17 00:00:00 2001 From: JOLIMAITRE Matthieu Date: Mon, 1 Apr 2024 17:56:35 +0200 Subject: [PATCH] - --- ia/tp3/.gitignore | 2 ++ ia/tp3/README.md | 8 +++++ ia/tp3/main.py | 67 +++++++++++++++++++++++++++++++++++++++++ ia/tp3/requirements.txt | 3 ++ ia/tp3/setup.sh | 16 ++++++++++ 5 files changed, 96 insertions(+) create mode 100644 ia/tp3/.gitignore create mode 100644 ia/tp3/README.md create mode 100755 ia/tp3/main.py create mode 100644 ia/tp3/requirements.txt create mode 100755 ia/tp3/setup.sh diff --git a/ia/tp3/.gitignore b/ia/tp3/.gitignore new file mode 100644 index 0000000..45a20f3 --- /dev/null +++ b/ia/tp3/.gitignore @@ -0,0 +1,2 @@ +/bin +/venv diff --git a/ia/tp3/README.md b/ia/tp3/README.md new file mode 100644 index 0000000..052e514 --- /dev/null +++ b/ia/tp3/README.md @@ -0,0 +1,8 @@ +# SSIE / IA - TP 3 + +## Mettre en place + +Le projet peut être mis en place avec le script `./setup.sh`. Cela va : + +- Télécharger le set d'entrainement. +- Installer les dépendances dans un environnement virtuel. diff --git a/ia/tp3/main.py b/ia/tp3/main.py new file mode 100755 index 0000000..3803fd3 --- /dev/null +++ b/ia/tp3/main.py @@ -0,0 +1,67 @@ +#!/bin/env -S python3 + +from dataclasses import dataclass + +import numpy +import pandas +import random +import math + +def main(): + dataset = load_dataset("bin/iris/iris.data") + # print("dataset", dataset) + (dataset_training, dataset_test) = split_dataset(dataset, 0.8) + # print("dataset_training", dataset_training, "dataset_test", dataset_test) + centroid = KVMeans(3, 4) + centroid.learn(dataset_training[["a", "b", "c", "d"]]) + + + +def load_dataset(path: str): + data = pandas.read_csv(path, names=["a", "b", "c", "d", "category"]) + return data + + +def split_dataset(dataset: pandas.DataFrame, ratio: float): + total_len = len(dataset) + start_len = math.floor(total_len * ratio) + (start, rest) = numpy.array_split(dataset, [start_len]) + return (start, rest) + + +@dataclass +class Centroid: + dimension: int + position: list[float] + + @staticmethod + def new_random(dimension: int, rng: random.Random): + return Centroid(dimension, [rng.random() for _ in range(dimension)]) + +class KVMeans: + + def __init__(self, mean_count: int, dimension: int): + rng = random.Random(0) + self.means = [Centroid.new_random(dimension, rng) for _ in range(mean_count)] + self.dimension = dimension + self.mean_count = mean_count + + def learn(self, dataset: pandas.DataFrame): + assert dataset.columns.size == self.dimension + for (_, line) in dataset.iterrows(): + for absc in range(self.dimension): + pass + + # print([3 for _ in series]) + # print(series) + # dataset.map(lambda e: print(e)) + + def predict(self): + pass + +def distance_dim(a: list[float], b: list[float]): + acc = 0 + for (a_, b_) in zip(a, b): acc += math.pow(a_ - b_, 2) + return math.sqrt(acc) + +if __name__ == "__main__": main() diff --git a/ia/tp3/requirements.txt b/ia/tp3/requirements.txt new file mode 100644 index 0000000..9def011 --- /dev/null +++ b/ia/tp3/requirements.txt @@ -0,0 +1,3 @@ +numpy +ucimlrepo +pandas \ No newline at end of file diff --git a/ia/tp3/setup.sh b/ia/tp3/setup.sh new file mode 100755 index 0000000..523c276 --- /dev/null +++ b/ia/tp3/setup.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +if ! [ -d venv ] +then python3 -m venv venv +fi + +if ! [ -d bin/iris ] +then + mkdir -p bin/iris + wget -O bin/iris/iris.zip https://archive.ics.uci.edu/static/public/53/iris.zip + ( cd bin/iris && unzip iris.zip ) + rm bin/iris/iris.zip +fi + +source venv/bin/activate +python3 -m pip install --requirement=requirements.txt