67 lines
1.8 KiB
Python
Executable file
67 lines
1.8 KiB
Python
Executable file
#!/bin/env -S python3
|
|
|
|
from dataclasses import dataclass
|
|
|
|
import numpy
|
|
import pandas
|
|
import random
|
|
import math
|
|
|
|
def main():
|
|
dataset = load_dataset("bin/iris/iris.data")
|
|
# print("dataset", dataset)
|
|
(dataset_training, dataset_test) = split_dataset(dataset, 0.8)
|
|
# print("dataset_training", dataset_training, "dataset_test", dataset_test)
|
|
centroid = KVMeans(3, 4)
|
|
centroid.learn(dataset_training[["a", "b", "c", "d"]])
|
|
|
|
|
|
|
|
def load_dataset(path: str):
|
|
data = pandas.read_csv(path, names=["a", "b", "c", "d", "category"])
|
|
return data
|
|
|
|
|
|
def split_dataset(dataset: pandas.DataFrame, ratio: float):
|
|
total_len = len(dataset)
|
|
start_len = math.floor(total_len * ratio)
|
|
(start, rest) = numpy.array_split(dataset, [start_len])
|
|
return (start, rest)
|
|
|
|
|
|
@dataclass
|
|
class Centroid:
|
|
dimension: int
|
|
position: list[float]
|
|
|
|
@staticmethod
|
|
def new_random(dimension: int, rng: random.Random):
|
|
return Centroid(dimension, [rng.random() for _ in range(dimension)])
|
|
|
|
class KVMeans:
|
|
|
|
def __init__(self, mean_count: int, dimension: int):
|
|
rng = random.Random(0)
|
|
self.means = [Centroid.new_random(dimension, rng) for _ in range(mean_count)]
|
|
self.dimension = dimension
|
|
self.mean_count = mean_count
|
|
|
|
def learn(self, dataset: pandas.DataFrame):
|
|
assert dataset.columns.size == self.dimension
|
|
for (_, line) in dataset.iterrows():
|
|
for absc in range(self.dimension):
|
|
pass
|
|
|
|
# print([3 for _ in series])
|
|
# print(series)
|
|
# dataset.map(lambda e: print(e))
|
|
|
|
def predict(self):
|
|
pass
|
|
|
|
def distance_dim(a: list[float], b: list[float]):
|
|
acc = 0
|
|
for (a_, b_) in zip(a, b): acc += math.pow(a_ - b_, 2)
|
|
return math.sqrt(acc)
|
|
|
|
if __name__ == "__main__": main()
|