cuda grid 和block理解(一)-CFANZ编程社区

#include <stdio.h>
#include <iostream>
using namespace std;

__global__ void hello_from_gpu()
{
    const int b = blockIdx.x;
    const int tx = threadIdx.x;
    const int ty = threadIdx.y;
   // cout<<b<<endl;
    printf("Hello World from block-%d and thread-(%d, %d)!\n", b, tx, ty);
}

int main(void)
{
    const dim3 block_size(2, 4);
    hello_from_gpu<<<2, block_size>>>();
    cudaDeviceSynchronize();
    return 0;
}

grid=[1,2]
block=[2,4]

nvcc hello5.cu 
./a.out

Hello World from block-0 and thread-(0, 0)!
Hello World from block-0 and thread-(1, 0)!
Hello World from block-0 and thread-(0, 1)!
Hello World from block-0 and thread-(1, 1)!
Hello World from block-0 and thread-(0, 2)!
Hello World from block-0 and thread-(1, 2)!
Hello World from block-0 and thread-(0, 3)!
Hello World from block-0 and thread-(1, 3)!
Hello World from block-1 and thread-(0, 0)!
Hello World from block-1 and thread-(1, 0)!
Hello World from block-1 and thread-(0, 1)!
Hello World from block-1 and thread-(1, 1)!
Hello World from block-1 and thread-(0, 2)!
Hello World from block-1 and thread-(1, 2)!
Hello World from block-1 and thread-(0, 3)!
Hello World from block-1 and thread-(1, 3)!