gpu tp2 1

2024-03-10 21:52:59 +01:00 · 2024-03-10 21:52:59 +01:00 · 643dc6e4fe
commit 643dc6e4fe
parent a9a59ba1ea
7 changed files with 499 additions and 0 deletions
--- a/gpu/tp2/c/src/ex2.cu
+++ b/gpu/tp2/c/src/ex2.cu
@ -0,0 +1,80 @@
+#include <iostream>
+
+//
+// example: CUDA_CHECK( cudaMalloc(dx, x, N*sizeof(int) );
+//
+#define CUDA_CHECK(code) { cuda_check((code), __FILE__, __LINE__); }
+inline void cuda_check(cudaError_t code, const char *file, int line) {
+    if(code != cudaSuccess) {
+        std::cout << file << ':' << line << ": [CUDA ERROR] " << cudaGetErrorString(code) << std::endl; 
+        std::abort();
+    }
+}
+
+//
+// step 04
+// return a pointer to the value at row i and column j from base_address 
+// with pitch in bytes
+//
+__device__ inline int* get_ptr(int* base_address, int i, int j, size_t pitch) {
+    
+}
+
+//
+// step 05
+// CUDA kernel add 
+//
+
+
+
+
+int main()
+{
+    constexpr int rows = 200;
+    constexpr int cols = 80;
+    int* x = (int*)malloc(rows*cols*sizeof(int));
+    int* y = (int*)malloc(rows*cols*sizeof(int));
+    for(int i = 0; i < rows*cols; ++i) {
+        x[i] = i;
+        y[i] = std::pow(-1,i) * i;
+    }
+
+    //
+    // step 06
+    //
+    int* dx;
+    int* dy;
+    size_t pitch;
+    // 1. allocate on device
+
+    // 2. copy from host to device
+
+    // 3. launch CUDA kernel
+    // const dim3 threads_per_bloc{32,32,1};
+
+    // 4. copy result from device to host
+
+    // 5. free device memory
+
+
+
+    // checking results
+    bool ok = true;
+    for(int i = 0; i < rows*cols; ++i) {
+        const int expected_result = std::pow(-1,i) * i + i;
+        if(y[i] != expected_result) {
+            std::cout << "Failure" << std::endl;
+            std::cout << "Result at index i=" 
+                << i << ": expected " 
+                << std::pow(-1,i) * i << '+' << i << '=' << expected_result << ", got " << y[i] << std::endl;
+            ok = false;
+            break;
+        }
+    }
+    if(ok) std::cout << "Success" << std::endl;
+
+    free(x);
+    free(y);
+    
+    return 0;
+}