This commit is contained in:
JOLIMAITRE Matthieu 2024-04-01 17:56:35 +02:00
parent 656356fb3b
commit 0bb5ae732f
5 changed files with 96 additions and 0 deletions

67
ia/tp3/main.py Executable file
View file

@ -0,0 +1,67 @@
#!/bin/env -S python3
from dataclasses import dataclass
import numpy
import pandas
import random
import math
def main():
dataset = load_dataset("bin/iris/iris.data")
# print("dataset", dataset)
(dataset_training, dataset_test) = split_dataset(dataset, 0.8)
# print("dataset_training", dataset_training, "dataset_test", dataset_test)
centroid = KVMeans(3, 4)
centroid.learn(dataset_training[["a", "b", "c", "d"]])
def load_dataset(path: str):
data = pandas.read_csv(path, names=["a", "b", "c", "d", "category"])
return data
def split_dataset(dataset: pandas.DataFrame, ratio: float):
total_len = len(dataset)
start_len = math.floor(total_len * ratio)
(start, rest) = numpy.array_split(dataset, [start_len])
return (start, rest)
@dataclass
class Centroid:
dimension: int
position: list[float]
@staticmethod
def new_random(dimension: int, rng: random.Random):
return Centroid(dimension, [rng.random() for _ in range(dimension)])
class KVMeans:
def __init__(self, mean_count: int, dimension: int):
rng = random.Random(0)
self.means = [Centroid.new_random(dimension, rng) for _ in range(mean_count)]
self.dimension = dimension
self.mean_count = mean_count
def learn(self, dataset: pandas.DataFrame):
assert dataset.columns.size == self.dimension
for (_, line) in dataset.iterrows():
for absc in range(self.dimension):
pass
# print([3 for _ in series])
# print(series)
# dataset.map(lambda e: print(e))
def predict(self):
pass
def distance_dim(a: list[float], b: list[float]):
acc = 0
for (a_, b_) in zip(a, b): acc += math.pow(a_ - b_, 2)
return math.sqrt(acc)
if __name__ == "__main__": main()