surete tp1

surete lustre setup
gpu_tp1
2024-03-10 21:33:20 +01:00 · 2024-03-10 21:32:46 +01:00 · 2024-03-10 21:31:32 +01:00
20 changed files with 500 additions and 0 deletions
--- a/gpu/tp1/.gitignore
+++ b/gpu/tp1/.gitignore
@ -0,0 +1,2 @@
+bin/
+*.zip
--- a/gpu/tp1/c/build.sh
+++ b/gpu/tp1/c/build.sh
@ -0,0 +1,20 @@
+#!/bin/sh
+cd "$(dirname "$(realpath "$0")")"
+set -e
+
+TARGET="ex1.cu ex2.cu ex3.cu ex4.cu"
+
+if [ $# -gt 0 ]
+then TARGET=$1
+fi
+
+rm -fr bin
+mkdir -p bin
+
+for target in $TARGET
+do nvcc src/$target -o bin/${target%.cu}.out
+done
+
+for target in $TARGET
+do ./bin/${target%.cu}.out
+done
--- a/gpu/tp1/c/src/ex1.cu
+++ b/gpu/tp1/c/src/ex1.cu
@ -0,0 +1,39 @@
+#include <iostream>
+
+#define TO_K(X) X / 1000
+#define TO_G(X) X / 1000000000
+#define FMT_3D(X) "(" << (X)[0] << ", " << (X)[1] << ", " << (X)[2] << ")"
+
+int main(int argc, char const *argv[])
+{
+    // step 01
+    int device_count = -1;
+    cudaGetDeviceCount(&device_count);
+
+    std::cout << "device_count = " << device_count << "\n";
+
+    for (auto i = 0; i < device_count; ++i)
+    {
+        std::cout << "device [" << i << "]:\n";
+
+        struct cudaDeviceProp device_prop;
+        cudaGetDeviceProperties(&device_prop, i);
+
+        std::cout << "\t'device_prop.name' : " << device_prop.name << "\n";
+        std::cout << "\t'device_prop.totalGlobalMem' : " << TO_G(device_prop.totalGlobalMem) << "\n";
+        std::cout << "\t'device_prop.sharedMemPerBlock' : " << TO_K(device_prop.sharedMemPerBlock) << "\n";
+        std::cout << "\t'device_prop.maxThreadsPerBlock' : " << device_prop.maxThreadsPerBlock << "\n";
+        std::cout << "\t'device_prop.maxThreadsDim' : " << FMT_3D(device_prop.maxThreadsDim) << "\n";
+        std::cout << "\t'device_prop.maxGridSize' : " << FMT_3D(device_prop.maxGridSize) << "\n";
+        std::cout << "\t'(device_prop.major, device_prop.minor)' : " << device_prop.major << "." << device_prop.minor << "\n";
+        std::cout << "\t'device_prop.warpSize' : " << device_prop.warpSize << "\n";
+        std::cout << "\t'device_prop.regsPerBlock' : " << device_prop.regsPerBlock << "\n";
+        std::cout << "\t'device_prop.multiProcessorCount' : " << device_prop.multiProcessorCount << "\n";
+
+    }
+
+    return 0;
+}
+
+// # Question 1
+// Avec 49152 octets de mémoire par bloc, il est possible de stocker 49152/4 = 12288 nombres flotants 32bit.
--- a/gpu/tp1/c/src/ex2.cu
+++ b/gpu/tp1/c/src/ex2.cu
@ -0,0 +1,17 @@
+#include <cstdio>
+
+// step 02
+
+__global__ void prints_hello() {
+    printf("Hello World bloc=%d thread=%d\n", blockIdx.x, threadIdx.x);
+}
+
+int main() {
+    // step 03
+    prints_hello<<<1, 1>>>();
+    cudaDeviceSynchronize();
+    return 0;
+}
+
+// # Question 2
+// Avec 4 blocs de 32 threads, le message apparaitra 4*32 = 128 fois.
--- a/gpu/tp1/c/src/ex3.cu
+++ b/gpu/tp1/c/src/ex3.cu
@ -0,0 +1,90 @@
+#include <iostream>
+
+#define RANGE(I, FROM, TO) size_t I = FROM; I < TO; I += 1
+
+//
+// example: CUDA_CHECK( cudaMalloc(dx, x, N*sizeof(int) );
+//
+#define CUDA_CHECK(code) { cuda_check((code), __FILE__, __LINE__); }
+inline void cuda_check(cudaError_t code, const char *file, int line) {
+    if(code != cudaSuccess) {
+        std::cout << file << ':' << line << ": [CUDA ERROR] " << cudaGetErrorString(code) << std::endl; 
+        std::abort();
+    }
+}
+
+
+// step 04
+
+
+__global__ void add(int N, const int* dx, int* dy) {
+    size_t index = blockIdx.x * blockDim.x + threadIdx.x;
+    if (index > N) return;
+    dy[index] += dx[index];
+}
+
+
+
+
+int main()
+{
+    constexpr int N = 1000;
+    int* x = (int*)malloc(N*sizeof(int));
+    int* y = (int*)malloc(N*sizeof(int));
+    for(int i = 0; i < N; ++i) {
+        x[i] = i;
+        y[i] = i*i;
+    }
+
+    // step 05
+    int* dx;
+    int* dy;
+
+    // 1. allocate on device
+    size_t size = N * sizeof(int);
+    cudaMalloc(&dx, size);
+    cudaMalloc(&dy, size);
+
+    // 2. copy from host to device
+    cudaMemcpy(dx, x, size, cudaMemcpyHostToDevice);
+    cudaMemcpy(dy, y, size, cudaMemcpyHostToDevice);
+
+    // 3. launch CUDA kernel
+    const int threads_per_bloc = 32;
+    add<<<N, 32>>>(N, dx, dy);
+    cudaDeviceSynchronize();
+
+
+    // 4. copy result from device to host
+    cudaMemcpy(y, dy, size, cudaMemcpyDeviceToHost);
+
+    // 5. free device memory
+    cudaFree(dx);
+    cudaFree(dy);
+
+    // checking results
+    bool ok = true;
+    for(int i = 0; i < N; ++i) {
+        const int expected_result = i + i*i;
+        if(y[i] != expected_result) {
+            std::cout << "Failure" << std::endl;
+            std::cout << "Result at index i=" 
+                << i << ": expected " 
+                << i << '+' << i*i << '=' << expected_result << ", got " << y[i] << std::endl;
+            ok = false;
+            break;
+        }
+    }
+    if(ok) std::cout << "Success" << std::endl;
+
+    free(x);
+    free(y);
+
+    return 0;
+}
+
+// # Question 3
+// Pour une suite de N tâches, avec des blocs de 32 threads
+// - il faudra idéalement ceil(N/32) blocs.
+// - sur le dernier bloc, N % 32 threads exécuteront une tâche
+// - sur le dernier bloc, 32 - (N % 32) threads n'exécuteront aucune tâche
--- a/gpu/tp1/c/src/ex4.cu
+++ b/gpu/tp1/c/src/ex4.cu
@ -0,0 +1,96 @@
+#include <iostream>
+
+#define RANGE(I, FROM, TO) size_t I = FROM; I < TO; I += 1
+
+//
+// example: CUDA_CHECK( cudaMalloc(dx, x, N*sizeof(int) );
+//
+#define CUDA_CHECK(code) { cuda_check((code), __FILE__, __LINE__); }
+inline void cuda_check(cudaError_t code, const char *file, int line) {
+    if(code != cudaSuccess) {
+        std::cout << file << ':' << line << ": [CUDA ERROR] " << cudaGetErrorString(code) << std::endl; 
+        std::abort();
+    }
+}
+
+
+// step 06
+
+__global__ void add_strided(int N, const int* dx, int* dy) {
+    size_t threads = blockDim.x * gridDim.x;
+    size_t items_per_threads = (N / threads) + 1;
+    size_t base_index = (blockIdx.x * blockDim.x + threadIdx.x) * items_per_threads;
+    for (RANGE(i, 0, items_per_threads)) {
+        size_t index = base_index + i;
+        if (index > N) continue;
+        dy[index] += dx[index];
+    }
+}
+
+
+
+int main()
+{
+    constexpr int N = 1000;
+    int* x = (int*)malloc(N*sizeof(int));
+    int* y = (int*)malloc(N*sizeof(int));
+    for(int i = 0; i < N; ++i) {
+        x[i] = i;
+        y[i] = i*i;
+    }
+
+    // step 07
+    int* dx;
+    int* dy;
+    // 1. allocate on device
+    size_t size = N * sizeof(int);
+    cudaMalloc(&dx, size);
+    cudaMalloc(&dy, size);
+
+
+
+    // 2. copy from host to device
+    cudaMemcpy(dx, x, size, cudaMemcpyHostToDevice);
+    cudaMemcpy(dy, y, size, cudaMemcpyHostToDevice);
+
+
+    // 3. launch CUDA kernel
+    const int threads_per_bloc = 32;
+    const int blocs = 8;
+    add_strided<<<blocs, threads_per_bloc>>>(N, dx, dy);
+    cudaDeviceSynchronize();
+
+
+    // 4. copy result from device to host
+    cudaMemcpy(y, dy, size, cudaMemcpyDeviceToHost);
+
+    // 5. free device memory
+    cudaFree(dx);
+    cudaFree(dy);
+
+
+    // checking results
+    bool ok = true;
+    for(int i = 0; i < N; ++i) {
+        const int expected_result = i + i*i;
+        if(y[i] != expected_result) {
+            std::cout << "Failure" << std::endl;
+            std::cout << "Result at index i=" 
+                << i << ": expected " 
+                << i << '+' << i*i << '=' << expected_result << ", got " << y[i] << std::endl;
+            ok = false;
+            break;
+        }
+    }
+    if(ok) std::cout << "Success" << std::endl;
+
+    free(x);
+    free(y);
+
+    return 0;
+}
+
+// # Question 4
+// Pour N tâches, X threads en tout,
+// - nous devons faire en moyenne N / X tâches par threads
+// - un stride valable est ceil(N / X)
--- a/surete/lustred/.gitignore
+++ b/surete/lustred/.gitignore
@ -0,0 +1 @@
+/dist
--- a/surete/lustred/setup.sh
+++ b/surete/lustred/setup.sh
@ -0,0 +1,36 @@
+#!/bin/sh
+set -e
+alias log="echo '[setup.sh]'"
+
+
+dist="${1:-"./dist"}"
+dist="$(realpath "$dist")"
+
+
+rm -fr "$dist"
+mkdir -p "$dist/lustre-v4" "$dist/lustre-v6"
+
+
+log "downloading"
+    wget -qO "$dist/lustre-v4.tgz" https://www-verimag.imag.fr/DIST-TOOLS/SYNCHRONE/lustre-v4/distrib/archives/lustre-v4-III-e-linux64.tgz &
+    wget -qO "$dist/lustre-v6.tgz" https://www-verimag.imag.fr/DIST-TOOLS/SYNCHRONE/lustre-v6/pre-compiled/x86_64-Linux-lv6-bin-dist.tgz   &
+    wait
+
+log "extracting"
+    tar -xvf "$dist/lustre-v4.tgz" --directory="$dist/lustre-v4" > /dev/null &
+    tar -xvf "$dist/lustre-v6.tgz" --directory="$dist/lustre-v6" > /dev/null &
+    wait
+    rm "$dist/lustre-v4.tgz" "$dist/lustre-v6.tgz"
+
+
+log "installing env"
+    echo '#!/bin/env -S echo "should be sourced :"
+    
+    dir="'"$dist"'"
+    
+    export LUSTRE_INSTALL="$dir/lustre-v4/lustre-v4-III-e0-linux64"
+    export PATH=$PATH:$LUSTRE_INSTALL/bin
+    export PATH=$PATH:~/Lustre/Lustre-v6/bin
+    ' > "$dist/env.sh"
+    
+log "installed at '$dist'"
--- a/surete/tp1/Exercice2.pdf
+++ b/surete/tp1/Exercice2.pdf
--- a/surete/tp1/sequential/Makefile
+++ b/surete/tp1/sequential/Makefile
@ -0,0 +1,22 @@
+LUCIOLE=luciole
+LUSTRE=lv6
+LESAR=lesar
+
+run:
+	 $(LUCIOLE) sequential.lus $(node)
+#	 $(LUCIOLE) $(LUSTRE) sequential.lus -node $(node) -exec
+
+always:
+	 make node=always run
+	 
+count_true:
+	 make node=count_true run
+	 
+count_succ_true:
+	 make node=count_succ_true run
+	 
+bounds:
+	 make node=bounds run
+	 
+clean:
+	 rm luciole.rif
--- a/surete/tp1/sequential/always.ec
+++ b/surete/tp1/sequential/always.ec
@ -0,0 +1,13 @@
+node always
+  (i: bool)
+returns
+  (o: bool);
+
+var
+  V4_prev: bool;
+
+let
+  o = (if i then V4_prev else false);
+  V4_prev = (true -> (pre o));
+tel
+
--- a/surete/tp1/sequential/bounds.ec
+++ b/surete/tp1/sequential/bounds.ec
@ -0,0 +1,17 @@
+node bounds
+  (i: int)
+returns
+  (min: int;
+  max: int);
+
+var
+  V18_prev_min: int;
+  V19_prev_max: int;
+
+let
+  min = (if (i < V18_prev_min) then i else V18_prev_min);
+  max = (if (i > V19_prev_max) then i else V19_prev_max);
+  V18_prev_min = (i -> (pre min));
+  V19_prev_max = (i -> (pre max));
+tel
+
--- a/surete/tp1/sequential/count_succ_true.ec
+++ b/surete/tp1/sequential/count_succ_true.ec
@ -0,0 +1,15 @@
+node count_succ_true
+  (i: bool)
+returns
+  (o: int);
+
+var
+  V12_prev_i: bool;
+  V13_prev_o: int;
+
+let
+  o = (if (i and V12_prev_i) then (V13_prev_o + 1) else V13_prev_o);
+  V12_prev_i = (false -> (pre i));
+  V13_prev_o = (0 -> (pre o));
+tel
+
--- a/surete/tp1/sequential/count_true.ec
+++ b/surete/tp1/sequential/count_true.ec
@ -0,0 +1,13 @@
+node count_true
+  (i: bool)
+returns
+  (o: int);
+
+var
+  V8_prev: int;
+
+let
+  o = (if i then (V8_prev + 1) else V8_prev);
+  V8_prev = (0 -> (pre o));
+tel
+
--- a/surete/tp1/sequential/sequential.lus
+++ b/surete/tp1/sequential/sequential.lus
@ -0,0 +1,32 @@
+node always(i : bool) returns (o : bool);
+var prev : bool;
+let
+  prev = true -> pre o;
+  o = if i then prev else false; 
+tel
+
+node count_true(i : bool) returns (o : int);
+var prev : int;
+let
+  prev = 0 -> pre o;
+  o = if i then prev + 1 else prev;
+tel
+
+node count_succ_true(i : bool) returns (o : int);
+var
+  prev_i : bool;
+  prev_o : int;
+let
+  prev_i = false -> pre i;
+  prev_o = 0 -> pre o;
+  o = if i and prev_i then prev_o + 1 else prev_o;
+tel
+
+node bounds(i : int) returns (min, max : int);
+var prev_min, prev_max : int;
+let
+  prev_min = i -> pre min;
+  prev_max = i -> pre max;
+  min = if (i < prev_min) then i else prev_min;
+  max = if (i > prev_max) then i else prev_max;
+tel
--- a/surete/tp1/temp_prop/after.ec
+++ b/surete/tp1/temp_prop/after.ec
@ -0,0 +1,13 @@
+node after
+  (a: bool)
+returns
+  (o: bool);
+
+var
+  V4_prev_o: bool;
+
+let
+  o = (V4_prev_o or a);
+  V4_prev_o = (false -> (pre o));
+tel
+
--- a/surete/tp1/temp_prop/always_since.ec
+++ b/surete/tp1/temp_prop/always_since.ec
@ -0,0 +1,14 @@
+node always_since
+  (a: bool;
+  b: bool)
+returns
+  (o: bool);
+
+var
+  V9_prev_o: bool;
+
+let
+  o = (if b then a else (V9_prev_o and a));
+  V9_prev_o = (false -> (pre o));
+tel
+
--- a/surete/tp1/temp_prop/build.sh
+++ b/surete/tp1/temp_prop/build.sh
@ -0,0 +1,15 @@
+#!/bin/sh
+set -e
+cd "$(dirname "$(realpath "$0")")"
+
+
+if [ $# -lt 1 ]
+then echo "Usage: ./build.sh <node>" && exit 1
+fi
+
+
+node="$1"
+src=./nodes.lus
+
+
+luciole $src "$node"
--- a/surete/tp1/temp_prop/nodes.lus
+++ b/surete/tp1/temp_prop/nodes.lus
@ -0,0 +1,30 @@
+
+node after (a: bool) returns (o: bool);
+var prev_o: bool;
+let
+  prev_o = false -> pre o;
+  o = prev_o or a;
+tel;
+
+node always_since (a: bool; b: bool) returns (o: bool);
+var prev_o: bool;
+let
+  prev_o = false -> pre o;
+  o = if b then a else prev_o and a;
+tel;
+
+node once_since(a: bool; reset: bool) returns (o: bool);
+var a_happened: bool;
+let
+  a_happened = a -> if reset then false else (pre a_happened or a);
+  o = (a and not reset) -> a_happened;
+tel;
+
+node always_from_to(b: bool; reset: bool; c: bool) returns (x: bool);
+var
+  c_happened: bool;
+  prev_b: bool;
+let
+  c_happened = c -> if reset then false else (c or pre c_happened);*
+  prev_b = b -> if reset then true else (b and prev_b);
+tel;
--- a/surete/tp1/temp_prop/once_since.ec
+++ b/surete/tp1/temp_prop/once_since.ec
@ -0,0 +1,15 @@
+node once_since
+  (a: bool;
+  reset: bool)
+returns
+  (o: bool);
+
+var
+  V14_a_happened: bool;
+
+let
+  o = ((a and (not reset)) -> V14_a_happened);
+  V14_a_happened = (a -> (if reset then false else ((pre V14_a_happened) or a))
+  );
+tel
+
Author	SHA1	Message	Date
JOLIMAITRE Matthieu	a85bb57d9a	surete tp1	2024-03-10 21:33:20 +01:00
JOLIMAITRE Matthieu	41bc6fdbf2	surete lustre setup	2024-03-10 21:32:46 +01:00
JOLIMAITRE Matthieu	6054d1ada0	gpu_tp1	2024-03-10 21:31:32 +01:00