-
This commit is contained in:
parent
656356fb3b
commit
0bb5ae732f
5 changed files with 96 additions and 0 deletions
2
ia/tp3/.gitignore
vendored
Normal file
2
ia/tp3/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
/bin
|
||||
/venv
|
8
ia/tp3/README.md
Normal file
8
ia/tp3/README.md
Normal file
|
@ -0,0 +1,8 @@
|
|||
# SSIE / IA - TP 3
|
||||
|
||||
## Mettre en place
|
||||
|
||||
Le projet peut être mis en place avec le script `./setup.sh`. Cela va :
|
||||
|
||||
- Télécharger le set d'entrainement.
|
||||
- Installer les dépendances dans un environnement virtuel.
|
67
ia/tp3/main.py
Executable file
67
ia/tp3/main.py
Executable file
|
@ -0,0 +1,67 @@
|
|||
#!/bin/env -S python3
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy
|
||||
import pandas
|
||||
import random
|
||||
import math
|
||||
|
||||
def main():
|
||||
dataset = load_dataset("bin/iris/iris.data")
|
||||
# print("dataset", dataset)
|
||||
(dataset_training, dataset_test) = split_dataset(dataset, 0.8)
|
||||
# print("dataset_training", dataset_training, "dataset_test", dataset_test)
|
||||
centroid = KVMeans(3, 4)
|
||||
centroid.learn(dataset_training[["a", "b", "c", "d"]])
|
||||
|
||||
|
||||
|
||||
def load_dataset(path: str):
|
||||
data = pandas.read_csv(path, names=["a", "b", "c", "d", "category"])
|
||||
return data
|
||||
|
||||
|
||||
def split_dataset(dataset: pandas.DataFrame, ratio: float):
|
||||
total_len = len(dataset)
|
||||
start_len = math.floor(total_len * ratio)
|
||||
(start, rest) = numpy.array_split(dataset, [start_len])
|
||||
return (start, rest)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Centroid:
|
||||
dimension: int
|
||||
position: list[float]
|
||||
|
||||
@staticmethod
|
||||
def new_random(dimension: int, rng: random.Random):
|
||||
return Centroid(dimension, [rng.random() for _ in range(dimension)])
|
||||
|
||||
class KVMeans:
|
||||
|
||||
def __init__(self, mean_count: int, dimension: int):
|
||||
rng = random.Random(0)
|
||||
self.means = [Centroid.new_random(dimension, rng) for _ in range(mean_count)]
|
||||
self.dimension = dimension
|
||||
self.mean_count = mean_count
|
||||
|
||||
def learn(self, dataset: pandas.DataFrame):
|
||||
assert dataset.columns.size == self.dimension
|
||||
for (_, line) in dataset.iterrows():
|
||||
for absc in range(self.dimension):
|
||||
pass
|
||||
|
||||
# print([3 for _ in series])
|
||||
# print(series)
|
||||
# dataset.map(lambda e: print(e))
|
||||
|
||||
def predict(self):
|
||||
pass
|
||||
|
||||
def distance_dim(a: list[float], b: list[float]):
|
||||
acc = 0
|
||||
for (a_, b_) in zip(a, b): acc += math.pow(a_ - b_, 2)
|
||||
return math.sqrt(acc)
|
||||
|
||||
if __name__ == "__main__": main()
|
3
ia/tp3/requirements.txt
Normal file
3
ia/tp3/requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
numpy
|
||||
ucimlrepo
|
||||
pandas
|
16
ia/tp3/setup.sh
Executable file
16
ia/tp3/setup.sh
Executable file
|
@ -0,0 +1,16 @@
|
|||
#!/bin/sh
|
||||
|
||||
if ! [ -d venv ]
|
||||
then python3 -m venv venv
|
||||
fi
|
||||
|
||||
if ! [ -d bin/iris ]
|
||||
then
|
||||
mkdir -p bin/iris
|
||||
wget -O bin/iris/iris.zip https://archive.ics.uci.edu/static/public/53/iris.zip
|
||||
( cd bin/iris && unzip iris.zip )
|
||||
rm bin/iris/iris.zip
|
||||
fi
|
||||
|
||||
source venv/bin/activate
|
||||
python3 -m pip install --requirement=requirements.txt
|
Loading…
Add table
Add a link
Reference in a new issue