Cleaned up code

This commit is contained in:
uzy lol 2024-12-12 01:31:08 -08:00
parent 3a15cd67b0
commit 961dc65a8f
5 changed files with 15 additions and 17 deletions

View File

@ -9,7 +9,7 @@ void add(int n, float *x, float *y) {
} }
int main(void) { int main(void) {
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements) int N = 1 << 29;
float *x = new float[N]; float *x = new float[N];
float *y = new float[N]; float *y = new float[N];
@ -20,10 +20,10 @@ int main(void) {
y[i] = 2.0f; y[i] = 2.0f;
} }
// Timer starts before the add function call // Start chrono timer
auto start_time = std::chrono::high_resolution_clock::now(); auto start_time = std::chrono::high_resolution_clock::now();
// Run kernel on the elements on the CPU // Run kernel on N << 29 elements on the CPU
add(N, x, y); add(N, x, y);
// Timer ends after the add function call // Timer ends after the add function call

View File

@ -1,6 +1,6 @@
#include <iostream>
#include <cmath> #include <cmath>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <iostream>
// function to add the elements of two arrays // function to add the elements of two arrays
__global__ void add(int n, float *x, float *y) { __global__ void add(int n, float *x, float *y) {
@ -9,7 +9,7 @@ __global__ void add(int n, float *x, float *y) {
} }
int main(void) { int main(void) {
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements) int N = 1 << 29;
float *x, *y; float *x, *y;
@ -40,4 +40,4 @@ int main(void) {
cudaFree(y); cudaFree(y);
return 0; return 0;
} }

View File

@ -1,6 +1,6 @@
#include <iostream>
#include <cmath> #include <cmath>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <iostream>
// function to add the elements of two arrays // function to add the elements of two arrays
__global__ void add(int n, float *x, float *y) { __global__ void add(int n, float *x, float *y) {
@ -11,7 +11,7 @@ __global__ void add(int n, float *x, float *y) {
} }
int main(void) { int main(void) {
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements) int N = 1 << 29;
float *x, *y; float *x, *y;
@ -42,4 +42,4 @@ int main(void) {
cudaFree(y); cudaFree(y);
return 0; return 0;
} }

View File

@ -1,6 +1,6 @@
#include <iostream>
#include <cmath> #include <cmath>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <iostream>
// function to add the elements of two arrays // function to add the elements of two arrays
__global__ void add(int n, float *x, float *y) { __global__ void add(int n, float *x, float *y) {
@ -11,7 +11,7 @@ __global__ void add(int n, float *x, float *y) {
} }
int main(void) { int main(void) {
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements) int N = 1 << 29;
float *x, *y; float *x, *y;
@ -30,7 +30,6 @@ int main(void) {
// Number of blocks in the grid // Number of blocks in the grid
int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock; int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock;
// Print out the number of thread blocks
std::cout << "Number of thread blocks: " << numberOfBlocks << std::endl; std::cout << "Number of thread blocks: " << numberOfBlocks << std::endl;
// Run kernel on the elements on the GPU with multiple blocks and threads // Run kernel on the elements on the GPU with multiple blocks and threads
@ -50,4 +49,4 @@ int main(void) {
cudaFree(y); cudaFree(y);
return 0; return 0;
} }

View File

@ -1,6 +1,6 @@
#include <iostream>
#include <cmath> #include <cmath>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <iostream>
// function to add the elements of two arrays // function to add the elements of two arrays
__global__ void add(int n, float *x, float *y) { __global__ void add(int n, float *x, float *y) {
@ -11,7 +11,7 @@ __global__ void add(int n, float *x, float *y) {
} }
int main(void) { int main(void) {
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements) int N = 1 << 29;
float *x, *y; float *x, *y;
@ -35,7 +35,6 @@ int main(void) {
// Number of blocks in the grid // Number of blocks in the grid
int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock; int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock;
// Print out the number of thread blocks
std::cout << "Number of thread blocks: " << numberOfBlocks << std::endl; std::cout << "Number of thread blocks: " << numberOfBlocks << std::endl;
// Run kernel on the elements on the GPU with multiple blocks and threads // Run kernel on the elements on the GPU with multiple blocks and threads
@ -55,4 +54,4 @@ int main(void) {
cudaFree(y); cudaFree(y);
return 0; return 0;
} }