This commit is contained in:
JOLIMAITRE Matthieu 2024-03-28 17:58:33 +01:00
parent 8b3bb9c382
commit d976cfaf74
37 changed files with 2669 additions and 371 deletions

13
gpu/tp4/.clang-format Normal file
View file

@ -0,0 +1,13 @@
# yaml-language-server: $schema=https://json.schemastore.org/clang-format.json
---
BasedOnStyle: LLVM
DerivePointerAlignment: false
IndentWidth: 4
PointerAlignment: Left
TabWidth: 4
UseTab: Always
AllowShortIfStatementsOnASingleLine: AllIfsAndElse
AllowShortLoopsOnASingleLine: true
ColumnLimit: 120
AllowShortBlocksOnASingleLine: Always
AllowShortFunctionsOnASingleLine: All

4
gpu/tp4/.clangd Normal file
View file

@ -0,0 +1,4 @@
CompileFlags:
Add:
- -xcuda
- --no-cuda-version-check

2
gpu/tp4/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
bin/
/*.zip

1
gpu/tp4/README.md Normal file
View file

@ -0,0 +1 @@
#

26
gpu/tp4/c/build.sh Executable file
View file

@ -0,0 +1,26 @@
#!/bin/sh
cd "$(dirname "$(realpath "$0")")"
set -e
alias log="echo '[build.sh]'"
TARGET="ex1.cu ex2.cu ex3.cu"
MODULES="conv.cu"
if [ $# -gt 0 ]
then targets="$@"
else targets="$TARGET"
fi
rm -fr bin
mkdir -p bin
ccargs="-O2"
#ccargs="$ccargs -g -G -Xcompiler -fsanitize=address"
for target in $targets
do
sources="$MODULES $target"
inputs="$(for src in $sources; do echo "src/$src"; done | xargs)"
nvcc $ccargs -o bin/${target}.out $modules $inputs
./bin/${target}.out
done

183
gpu/tp4/c/src/conv.cu Normal file
View file

@ -0,0 +1,183 @@
#include "conv.h"
constexpr int threads_per_bloc = 16;
constexpr int T = threads_per_bloc;
//
// example: CUDA_CHECK( cudaMalloc(dx, x, N*sizeof(int) );
//
#define CUDA_CHECK(code) \
{ cuda_check((code), __FILE__, __LINE__); }
inline void cuda_check(cudaError_t code, const char* file, int line) {
if (code != cudaSuccess) {
std::cout << file << ':' << line << ": [CUDA ERROR] " << cudaGetErrorString(code) << std::endl;
std::abort();
}
}
#define RANGE(X, FROM, TO) \
long X = FROM; \
X < TO; \
X += 1
#define THREAD_ID_X() (blockIdx.x * blockDim.x + threadIdx.x);
#define DBG(X, FMT) printf(#X ": %" FMT "\n", X);
#define DBG_S(X) #X << ": " << X << ", "
//
// 1D convolution
// - x: input array of size N
// - y: kernel of odd size M
//
// CPU
//
std::vector<int> conv1(const std::vector<int>& x, const std::vector<int>& y) {
//
// step 01
//
const int N = x.size();
const int M = y.size();
const int P = (M - 1) / 2;
auto z = std::vector<int>(N);
for (RANGE(result_index, 0, x.size())) {
auto result = 0;
for (RANGE(y_index, 0, y.size())) {
auto x_index = result_index - P + y_index;
if (x_index < 0 || x_index >= x.size()) continue;
result += x.at(x_index) * y.at(y_index);
}
z.at(result_index) = result;
}
return z;
}
namespace kernel {
//
// step 02
//
__global__ void conv2(const int* dx, const int* dy, int x_length, int y_length, int* dz) {
auto thread_id = (long)THREAD_ID_X();
if (thread_id >= x_length) return;
auto offset = (y_length - 1) / 2;
auto result_index = thread_id;
auto result = 0;
for (RANGE(y_index, 0, y_length)) {
auto x_index = result_index - offset + y_index;
if (x_index < 0 || x_index >= x_length) continue;
result += dx[x_index] * dy[y_index];
}
dz[result_index] = result;
}
} // namespace kernel
//
// 1D convolution
// - x: input array of size N
// - y: kernel of odd size M
//
// GPU (naive)
//
std::vector<int> conv2(const std::vector<int>& x, const std::vector<int>& y) {
//
// step 03
//
auto dx = (int*)nullptr;
auto size_dx = x.size() * sizeof(int);
cudaMalloc(&dx, size_dx);
cudaMemcpy(dx, x.data(), size_dx, cudaMemcpyHostToDevice);
auto dy = (int*)nullptr;
auto size_dy = y.size() * sizeof(int);
cudaMalloc(&dy, size_dy);
cudaMemcpy(dy, y.data(), size_dy, cudaMemcpyHostToDevice);
auto dz = (int*)nullptr;
auto size_dz = x.size() * sizeof(int);
cudaMalloc(&dz, size_dz);
auto blocks = x.size() / threads_per_bloc + 1;
kernel::conv2<<<blocks, threads_per_bloc>>>(dx, dy, x.size(), y.size(), dz);
cudaFree(dx);
cudaFree(dy);
auto z = std::vector<int>(x.size());
cudaMemcpy(z.data(), dz, size_dz, cudaMemcpyDeviceToHost);
cudaFree(dz);
return z;
}
namespace kernel {
//
// step 04
//
__global__ void conv3(const int* dx, const int* dy, int x_length, int y_length, int* dz) {
__shared__ int buffer[T];
auto thread_id = (long)THREAD_ID_X();
if (thread_id >= x_length) return;
buffer[thread_id % T] = dx[thread_id];
__syncthreads();
auto buffer_lower_x_index = (thread_id / T) * T;
auto buffer_upper_x_index = buffer_lower_x_index + T;
auto offset = (y_length - 1) / 2;
auto result_index = thread_id;
auto result = 0;
for (RANGE(y_index, 0, y_length)) {
auto x_index = result_index - offset + y_index;
if (x_index < 0 || x_index >= x_length) continue;
auto in_buffer = x_index >= buffer_lower_x_index && x_index < buffer_upper_x_index;
if (in_buffer) {
auto buff_index = x_index - buffer_lower_x_index;
result += buffer[buff_index] * dy[y_index];
} else result += dx[x_index] * dy[y_index];
}
dz[result_index] = result;
}
} // namespace kernel
//
// 1D convolution
// - x: input array of size N
// - y: kernel of odd size M
//
// GPU (optimized)
//
std::vector<int> conv3(const std::vector<int>& x, const std::vector<int>& y) {
//
// step 05
//
auto dx = (int*)nullptr;
auto size_dx = x.size() * sizeof(int);
cudaMalloc(&dx, size_dx);
cudaMemcpy(dx, x.data(), size_dx, cudaMemcpyHostToDevice);
auto dy = (int*)nullptr;
auto size_dy = y.size() * sizeof(int);
cudaMalloc(&dy, size_dy);
cudaMemcpy(dy, y.data(), size_dy, cudaMemcpyHostToDevice);
auto dz = (int*)nullptr;
auto size_dz = x.size() * sizeof(int);
cudaMalloc(&dz, size_dz);
auto blocks = x.size() / threads_per_bloc + 1;
kernel::conv3<<<blocks, threads_per_bloc>>>(dx, dy, x.size(), y.size(), dz);
cudaFree(dx);
cudaFree(dy);
auto z = std::vector<int>(x.size());
cudaMemcpy(z.data(), dz, size_dz, cudaMemcpyDeviceToHost);
cudaFree(dz);
return z;
}

19
gpu/tp4/c/src/conv.h Normal file
View file

@ -0,0 +1,19 @@
#pragma once
#include <iostream>
#include <vector>
//
// 1D convolution
// - x: input array of size N
// - y: kernel of odd size M
//
// CPU
std::vector<int> conv1(const std::vector<int>& x, const std::vector<int>& y);
// GPU (naive)
std::vector<int> conv2(const std::vector<int>& x, const std::vector<int>& y);
// GPU (optimized)
std::vector<int> conv3(const std::vector<int>& x, const std::vector<int>& y);

66
gpu/tp4/c/src/ex1.cu Normal file
View file

@ -0,0 +1,66 @@
#include "conv.h"
void print(const std::vector<int>& vec) {
if (vec.empty()) {
std::cout << "[]" << std::endl;
} else {
std::cout << "[";
for (size_t i = 0; i < vec.size() - 1; ++i) std::cout << vec[i] << ", ";
std::cout << vec.back() << "]" << std::endl;
}
}
int main() {
{
std::cout << "Test 1" << std::endl;
const auto x = std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; // N = 10
const auto y = std::vector{0, 1, 0}; // M = 3
const auto z_sol = std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
const auto z = conv1(x, y);
if (z != z_sol) {
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
} else {
std::cout << "Ok" << std::endl;
}
}
{
std::cout << "Test 2" << std::endl;
const auto x = std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; // N = 10
const auto y = std::vector{1, 2, 4, 2, 1}; // M = 5
const auto z_sol = std::vector{4, 11, 20, 30, 40, 50, 60, 70, 70, 59};
const auto z = conv1(x, y);
if (z != z_sol) {
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
} else {
std::cout << "Ok" << std::endl;
}
}
{
std::cout << "Test 3" << std::endl;
const auto x = std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}; // N = 35
const auto y =
std::vector{1, -2, 4, -8, 16, -32, 64, -128, 256, -1024, 256, -128, 64, -32, 16, -8, 4, -2, 1}; // M = 19
const auto z_sol =
std::vector{117, -736, -1333, -2058, -2719, -3412, -4089, -4774, -5455, -6138, -6820, -7502,
-8184, -8866, -9548, -10230, -10912, -11594, -12276, -12958, -13640, -14322, -15004, -15686,
-16368, -17050, -17767, -18380, -19201, -19606, -20843, -20416, -23317, -19562, -29119};
const auto z = conv1(x, y);
if (z != z_sol) {
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
} else {
std::cout << "Ok" << std::endl;
}
}
return 0;
}

76
gpu/tp4/c/src/ex2.cu Normal file
View file

@ -0,0 +1,76 @@
#include "conv.h"
void print(const std::vector<int>& vec)
{
if(vec.empty())
{
std::cout << "[]" << std::endl;
}
else
{
std::cout << "[";
for(size_t i = 0; i < vec.size()-1; ++i)
std::cout << vec[i] << ", ";
std::cout << vec.back() << "]" << std::endl;
}
}
int main()
{
{
std::cout << "Test 1" << std::endl;
const std::vector<int> x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; // N = 10
const std::vector<int> y = {0, 1, 0}; // M = 3
const std::vector<int> z_sol = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
const std::vector<int> z = conv2(x, y);
if(z != z_sol)
{
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
}
else
{
std::cout << "Ok" << std::endl;
}
}
{
std::cout << "Test 2" << std::endl;
const std::vector<int> x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; // N = 10
const std::vector<int> y = {1, 2, 4, 2, 1}; // M = 5
const std::vector<int> z_sol = {4, 11, 20, 30, 40, 50, 60, 70, 70, 59};
const std::vector<int> z = conv2(x, y);
if(z != z_sol)
{
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
}
else
{
std::cout << "Ok" << std::endl;
}
}
{
std::cout << "Test 3" << std::endl;
const std::vector<int> x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}; // N = 35
const std::vector<int> y = {1, -2, 4, -8, 16, -32, 64, -128, 256, -1024, 256, -128, 64, -32, 16, -8, 4, -2, 1}; // M = 19
const std::vector<int> z_sol = {117, -736, -1333, -2058, -2719, -3412, -4089, -4774, -5455, -6138, -6820, -7502, -8184, -8866, -9548, -10230, -10912, -11594, -12276, -12958, -13640, -14322, -15004, -15686, -16368, -17050, -17767, -18380, -19201, -19606, -20843, -20416, -23317, -19562, -29119};
const std::vector<int> z = conv2(x, y);
if(z != z_sol)
{
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
}
else
{
std::cout << "Ok" << std::endl;
}
}
return 0;
}

76
gpu/tp4/c/src/ex3.cu Normal file
View file

@ -0,0 +1,76 @@
#include "conv.h"
void print(const std::vector<int>& vec)
{
if(vec.empty())
{
std::cout << "[]" << std::endl;
}
else
{
std::cout << "[";
for(size_t i = 0; i < vec.size()-1; ++i)
std::cout << vec[i] << ", ";
std::cout << vec.back() << "]" << std::endl;
}
}
int main()
{
{
std::cout << "Test 1" << std::endl;
const std::vector<int> x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; // N = 10
const std::vector<int> y = {0, 1, 0}; // M = 3
const std::vector<int> z_sol = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
const std::vector<int> z = conv3(x, y);
if(z != z_sol)
{
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
}
else
{
std::cout << "Ok" << std::endl;
}
}
{
std::cout << "Test 2" << std::endl;
const std::vector<int> x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; // N = 10
const std::vector<int> y = {1, 2, 4, 2, 1}; // M = 5
const std::vector<int> z_sol = {4, 11, 20, 30, 40, 50, 60, 70, 70, 59};
const std::vector<int> z = conv3(x, y);
if(z != z_sol)
{
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
}
else
{
std::cout << "Ok" << std::endl;
}
}
{
std::cout << "Test 3" << std::endl;
const std::vector<int> x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}; // N = 35
const std::vector<int> y = {1, -2, 4, -8, 16, -32, 64, -128, 256, -1024, 256, -128, 64, -32, 16, -8, 4, -2, 1}; // M = 19
const std::vector<int> z_sol = {117, -736, -1333, -2058, -2719, -3412, -4089, -4774, -5455, -6138, -6820, -7502, -8184, -8866, -9548, -10230, -10912, -11594, -12276, -12958, -13640, -14322, -15004, -15686, -16368, -17050, -17767, -18380, -19201, -19606, -20843, -20416, -23317, -19562, -29119};
const std::vector<int> z = conv3(x, y);
if(z != z_sol)
{
std::cout << "Error, expected:" << std::endl;
print(z_sol);
std::cout << "got:" << std::endl;
print(z);
}
else
{
std::cout << "Ok" << std::endl;
}
}
return 0;
}