Cleaned up code
This commit is contained in:
parent
3a15cd67b0
commit
961dc65a8f
@ -9,7 +9,7 @@ void add(int n, float *x, float *y) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements)
|
int N = 1 << 29;
|
||||||
|
|
||||||
float *x = new float[N];
|
float *x = new float[N];
|
||||||
float *y = new float[N];
|
float *y = new float[N];
|
||||||
@ -20,10 +20,10 @@ int main(void) {
|
|||||||
y[i] = 2.0f;
|
y[i] = 2.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Timer starts before the add function call
|
// Start chrono timer
|
||||||
auto start_time = std::chrono::high_resolution_clock::now();
|
auto start_time = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// Run kernel on the elements on the CPU
|
// Run kernel on N << 29 elements on the CPU
|
||||||
add(N, x, y);
|
add(N, x, y);
|
||||||
|
|
||||||
// Timer ends after the add function call
|
// Timer ends after the add function call
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
// function to add the elements of two arrays
|
// function to add the elements of two arrays
|
||||||
__global__ void add(int n, float *x, float *y) {
|
__global__ void add(int n, float *x, float *y) {
|
||||||
@ -9,7 +9,7 @@ __global__ void add(int n, float *x, float *y) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements)
|
int N = 1 << 29;
|
||||||
|
|
||||||
float *x, *y;
|
float *x, *y;
|
||||||
|
|
||||||
@ -40,4 +40,4 @@ int main(void) {
|
|||||||
cudaFree(y);
|
cudaFree(y);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
// function to add the elements of two arrays
|
// function to add the elements of two arrays
|
||||||
__global__ void add(int n, float *x, float *y) {
|
__global__ void add(int n, float *x, float *y) {
|
||||||
@ -11,7 +11,7 @@ __global__ void add(int n, float *x, float *y) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements)
|
int N = 1 << 29;
|
||||||
|
|
||||||
float *x, *y;
|
float *x, *y;
|
||||||
|
|
||||||
@ -42,4 +42,4 @@ int main(void) {
|
|||||||
cudaFree(y);
|
cudaFree(y);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
// function to add the elements of two arrays
|
// function to add the elements of two arrays
|
||||||
__global__ void add(int n, float *x, float *y) {
|
__global__ void add(int n, float *x, float *y) {
|
||||||
@ -11,7 +11,7 @@ __global__ void add(int n, float *x, float *y) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements)
|
int N = 1 << 29;
|
||||||
|
|
||||||
float *x, *y;
|
float *x, *y;
|
||||||
|
|
||||||
@ -30,7 +30,6 @@ int main(void) {
|
|||||||
// Number of blocks in the grid
|
// Number of blocks in the grid
|
||||||
int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||||
|
|
||||||
// Print out the number of thread blocks
|
|
||||||
std::cout << "Number of thread blocks: " << numberOfBlocks << std::endl;
|
std::cout << "Number of thread blocks: " << numberOfBlocks << std::endl;
|
||||||
|
|
||||||
// Run kernel on the elements on the GPU with multiple blocks and threads
|
// Run kernel on the elements on the GPU with multiple blocks and threads
|
||||||
@ -50,4 +49,4 @@ int main(void) {
|
|||||||
cudaFree(y);
|
cudaFree(y);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
// function to add the elements of two arrays
|
// function to add the elements of two arrays
|
||||||
__global__ void add(int n, float *x, float *y) {
|
__global__ void add(int n, float *x, float *y) {
|
||||||
@ -11,7 +11,7 @@ __global__ void add(int n, float *x, float *y) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
int N = 1 << 29; // Setting problem size to 1<<29 (536,870,912 elements)
|
int N = 1 << 29;
|
||||||
|
|
||||||
float *x, *y;
|
float *x, *y;
|
||||||
|
|
||||||
@ -35,7 +35,6 @@ int main(void) {
|
|||||||
// Number of blocks in the grid
|
// Number of blocks in the grid
|
||||||
int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
int numberOfBlocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||||
|
|
||||||
// Print out the number of thread blocks
|
|
||||||
std::cout << "Number of thread blocks: " << numberOfBlocks << std::endl;
|
std::cout << "Number of thread blocks: " << numberOfBlocks << std::endl;
|
||||||
|
|
||||||
// Run kernel on the elements on the GPU with multiple blocks and threads
|
// Run kernel on the elements on the GPU with multiple blocks and threads
|
||||||
@ -55,4 +54,4 @@ int main(void) {
|
|||||||
cudaFree(y);
|
cudaFree(y);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user