2015年8月6日 星期四

CUDA的學習筆記

從今天開始打算做一些筆記,從CUDA開始,第一篇文章先記錄今天練習的例子,把kernel的block id和thread ID等都顯示出來。


例子是從CUDA Programming電子書的Chapter 5, P.80 看到的,目的是要熟悉GPU平行程式算ID的方式。

Compile:
nvcc -arch=sm_30 *.cu -o thethreadid.out



#include <stdio.h>
#include <stdlib.h>
//#include <conio.h>

#define ARRAY_SIZE 128
#define ARRAY_SIZE_IN_BYTES (sizeof(unsigned int) * (ARRAY_SIZE))

unsigned int cpu_block[ARRAY_SIZE];
unsigned int cpu_thread[ARRAY_SIZE];
unsigned int cpu_warp[ARRAY_SIZE];
unsigned int cpu_calc_thread[ARRAY_SIZE];

__global__ void what_is_my_id(unsigned int * const block
                            , unsigned int * const thread
                            , unsigned int * const warp
                            , unsigned int * const calc_thread)
{
  const unsigned int thread_idx = (blockIdx.x * blockDim.x) + threadIdx.x;
  block[thread_idx] = blockIdx.x;
  thread[thread_idx] = threadIdx.x;
  warp[thread_idx] = threadIdx.x / warpSize;
  calc_thread[thread_idx] = thread_idx;
}

int main(void)
{
  const unsigned int num_blocks = 2;
  const unsigned int num_threads = 64;
  
  char ch;
  
  unsigned int * gpu_block;
  unsigned int * gpu_thread;
  unsigned int * gpu_warp;
  unsigned int * gpu_calc_thread;
  
  unsigned int i;
  
  cudaMalloc((void **)&gpu_block, ARRAY_SIZE_IN_BYTES);
  cudaMalloc((void **)&gpu_thread, ARRAY_SIZE_IN_BYTES);
  cudaMalloc((void **)&gpu_warp, ARRAY_SIZE_IN_BYTES);
  cudaMalloc((void **)&gpu_calc_thread, ARRAY_SIZE_IN_BYTES);
  
  what_is_my_id<<<num_blocks, num_threads>>>(gpu_block, gpu_thread, gpu_warp, gpu_calc_thread);

  cudaMemcpy(cpu_block, gpu_block, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
  cudaMemcpy(cpu_thread, gpu_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
  cudaMemcpy(cpu_warp, gpu_warp, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
  cudaMemcpy(cpu_calc_thread, gpu_calc_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
  
  cudaFree(gpu_block);
  cudaFree(gpu_thread);
  cudaFree(gpu_warp);
  cudaFree(gpu_calc_thread);
  
  for(i = 0; i < ARRAY_SIZE; i++)
  {
    printf("Calculated Thread: %3u - Block: %2u - Warp %2u - Thread %3u\n"
            ,cpu_calc_thread[i], cpu_block[i], cpu_warp[i], cpu_thread[i]);
  }
  //ch = getch();
}
執行結果:

Calculated Thread:   0 - Block:  0 - Warp  0 - Thread   0
Calculated Thread:   1 - Block:  0 - Warp  0 - Thread   1
Calculated Thread:   2 - Block:  0 - Warp  0 - Thread   2
Calculated Thread:   3 - Block:  0 - Warp  0 - Thread   3
Calculated Thread:   4 - Block:  0 - Warp  0 - Thread   4
Calculated Thread:   5 - Block:  0 - Warp  0 - Thread   5
Calculated Thread:   6 - Block:  0 - Warp  0 - Thread   6
Calculated Thread:   7 - Block:  0 - Warp  0 - Thread   7
Calculated Thread:   8 - Block:  0 - Warp  0 - Thread   8
Calculated Thread:   9 - Block:  0 - Warp  0 - Thread   9
Calculated Thread:  10 - Block:  0 - Warp  0 - Thread  10
Calculated Thread:  11 - Block:  0 - Warp  0 - Thread  11
Calculated Thread:  12 - Block:  0 - Warp  0 - Thread  12
Calculated Thread:  13 - Block:  0 - Warp  0 - Thread  13
Calculated Thread:  14 - Block:  0 - Warp  0 - Thread  14
Calculated Thread:  15 - Block:  0 - Warp  0 - Thread  15
Calculated Thread:  16 - Block:  0 - Warp  0 - Thread  16
Calculated Thread:  17 - Block:  0 - Warp  0 - Thread  17
Calculated Thread:  18 - Block:  0 - Warp  0 - Thread  18
Calculated Thread:  19 - Block:  0 - Warp  0 - Thread  19
Calculated Thread:  20 - Block:  0 - Warp  0 - Thread  20
Calculated Thread:  21 - Block:  0 - Warp  0 - Thread  21
Calculated Thread:  22 - Block:  0 - Warp  0 - Thread  22
Calculated Thread:  23 - Block:  0 - Warp  0 - Thread  23
Calculated Thread:  24 - Block:  0 - Warp  0 - Thread  24
Calculated Thread:  25 - Block:  0 - Warp  0 - Thread  25
Calculated Thread:  26 - Block:  0 - Warp  0 - Thread  26
Calculated Thread:  27 - Block:  0 - Warp  0 - Thread  27
Calculated Thread:  28 - Block:  0 - Warp  0 - Thread  28
Calculated Thread:  29 - Block:  0 - Warp  0 - Thread  29
Calculated Thread:  30 - Block:  0 - Warp  0 - Thread  30
Calculated Thread:  31 - Block:  0 - Warp  0 - Thread  31
Calculated Thread:  32 - Block:  0 - Warp  1 - Thread  32
Calculated Thread:  33 - Block:  0 - Warp  1 - Thread  33
Calculated Thread:  34 - Block:  0 - Warp  1 - Thread  34
Calculated Thread:  35 - Block:  0 - Warp  1 - Thread  35
Calculated Thread:  36 - Block:  0 - Warp  1 - Thread  36
Calculated Thread:  37 - Block:  0 - Warp  1 - Thread  37
Calculated Thread:  38 - Block:  0 - Warp  1 - Thread  38
Calculated Thread:  39 - Block:  0 - Warp  1 - Thread  39
Calculated Thread:  40 - Block:  0 - Warp  1 - Thread  40
Calculated Thread:  41 - Block:  0 - Warp  1 - Thread  41
Calculated Thread:  42 - Block:  0 - Warp  1 - Thread  42
Calculated Thread:  43 - Block:  0 - Warp  1 - Thread  43
Calculated Thread:  44 - Block:  0 - Warp  1 - Thread  44
Calculated Thread:  45 - Block:  0 - Warp  1 - Thread  45
Calculated Thread:  46 - Block:  0 - Warp  1 - Thread  46
Calculated Thread:  47 - Block:  0 - Warp  1 - Thread  47
Calculated Thread:  48 - Block:  0 - Warp  1 - Thread  48
Calculated Thread:  49 - Block:  0 - Warp  1 - Thread  49
Calculated Thread:  50 - Block:  0 - Warp  1 - Thread  50
Calculated Thread:  51 - Block:  0 - Warp  1 - Thread  51
Calculated Thread:  52 - Block:  0 - Warp  1 - Thread  52
Calculated Thread:  53 - Block:  0 - Warp  1 - Thread  53
Calculated Thread:  54 - Block:  0 - Warp  1 - Thread  54
Calculated Thread:  55 - Block:  0 - Warp  1 - Thread  55
Calculated Thread:  56 - Block:  0 - Warp  1 - Thread  56
Calculated Thread:  57 - Block:  0 - Warp  1 - Thread  57
Calculated Thread:  58 - Block:  0 - Warp  1 - Thread  58
Calculated Thread:  59 - Block:  0 - Warp  1 - Thread  59
Calculated Thread:  60 - Block:  0 - Warp  1 - Thread  60
Calculated Thread:  61 - Block:  0 - Warp  1 - Thread  61
Calculated Thread:  62 - Block:  0 - Warp  1 - Thread  62
Calculated Thread:  63 - Block:  0 - Warp  1 - Thread  63
Calculated Thread:  64 - Block:  1 - Warp  0 - Thread   0
Calculated Thread:  65 - Block:  1 - Warp  0 - Thread   1
Calculated Thread:  66 - Block:  1 - Warp  0 - Thread   2
Calculated Thread:  67 - Block:  1 - Warp  0 - Thread   3
Calculated Thread:  68 - Block:  1 - Warp  0 - Thread   4
Calculated Thread:  69 - Block:  1 - Warp  0 - Thread   5
Calculated Thread:  70 - Block:  1 - Warp  0 - Thread   6
Calculated Thread:  71 - Block:  1 - Warp  0 - Thread   7
Calculated Thread:  72 - Block:  1 - Warp  0 - Thread   8
Calculated Thread:  73 - Block:  1 - Warp  0 - Thread   9
Calculated Thread:  74 - Block:  1 - Warp  0 - Thread  10
Calculated Thread:  75 - Block:  1 - Warp  0 - Thread  11
Calculated Thread:  76 - Block:  1 - Warp  0 - Thread  12
Calculated Thread:  77 - Block:  1 - Warp  0 - Thread  13
Calculated Thread:  78 - Block:  1 - Warp  0 - Thread  14
Calculated Thread:  79 - Block:  1 - Warp  0 - Thread  15
Calculated Thread:  80 - Block:  1 - Warp  0 - Thread  16
Calculated Thread:  81 - Block:  1 - Warp  0 - Thread  17
Calculated Thread:  82 - Block:  1 - Warp  0 - Thread  18
Calculated Thread:  83 - Block:  1 - Warp  0 - Thread  19
Calculated Thread:  84 - Block:  1 - Warp  0 - Thread  20
Calculated Thread:  85 - Block:  1 - Warp  0 - Thread  21
Calculated Thread:  86 - Block:  1 - Warp  0 - Thread  22
Calculated Thread:  87 - Block:  1 - Warp  0 - Thread  23
Calculated Thread:  88 - Block:  1 - Warp  0 - Thread  24
Calculated Thread:  89 - Block:  1 - Warp  0 - Thread  25
Calculated Thread:  90 - Block:  1 - Warp  0 - Thread  26
Calculated Thread:  91 - Block:  1 - Warp  0 - Thread  27
Calculated Thread:  92 - Block:  1 - Warp  0 - Thread  28
Calculated Thread:  93 - Block:  1 - Warp  0 - Thread  29
Calculated Thread:  94 - Block:  1 - Warp  0 - Thread  30
Calculated Thread:  95 - Block:  1 - Warp  0 - Thread  31
Calculated Thread:  96 - Block:  1 - Warp  1 - Thread  32
Calculated Thread:  97 - Block:  1 - Warp  1 - Thread  33
Calculated Thread:  98 - Block:  1 - Warp  1 - Thread  34
Calculated Thread:  99 - Block:  1 - Warp  1 - Thread  35
Calculated Thread: 100 - Block:  1 - Warp  1 - Thread  36
Calculated Thread: 101 - Block:  1 - Warp  1 - Thread  37
Calculated Thread: 102 - Block:  1 - Warp  1 - Thread  38
Calculated Thread: 103 - Block:  1 - Warp  1 - Thread  39
Calculated Thread: 104 - Block:  1 - Warp  1 - Thread  40
Calculated Thread: 105 - Block:  1 - Warp  1 - Thread  41
Calculated Thread: 106 - Block:  1 - Warp  1 - Thread  42
Calculated Thread: 107 - Block:  1 - Warp  1 - Thread  43
Calculated Thread: 108 - Block:  1 - Warp  1 - Thread  44
Calculated Thread: 109 - Block:  1 - Warp  1 - Thread  45
Calculated Thread: 110 - Block:  1 - Warp  1 - Thread  46
Calculated Thread: 111 - Block:  1 - Warp  1 - Thread  47
Calculated Thread: 112 - Block:  1 - Warp  1 - Thread  48
Calculated Thread: 113 - Block:  1 - Warp  1 - Thread  49
Calculated Thread: 114 - Block:  1 - Warp  1 - Thread  50
Calculated Thread: 115 - Block:  1 - Warp  1 - Thread  51
Calculated Thread: 116 - Block:  1 - Warp  1 - Thread  52
Calculated Thread: 117 - Block:  1 - Warp  1 - Thread  53
Calculated Thread: 118 - Block:  1 - Warp  1 - Thread  54
Calculated Thread: 119 - Block:  1 - Warp  1 - Thread  55
Calculated Thread: 120 - Block:  1 - Warp  1 - Thread  56
Calculated Thread: 121 - Block:  1 - Warp  1 - Thread  57
Calculated Thread: 122 - Block:  1 - Warp  1 - Thread  58
Calculated Thread: 123 - Block:  1 - Warp  1 - Thread  59
Calculated Thread: 124 - Block:  1 - Warp  1 - Thread  60
Calculated Thread: 125 - Block:  1 - Warp  1 - Thread  61
Calculated Thread: 126 - Block:  1 - Warp  1 - Thread  62
Calculated Thread: 127 - Block:  1 - Warp  1 - Thread  63

沒有留言:

張貼留言

IKEA吊櫃廚櫃

 好不容易裝好IKEA買來的吊櫃,花了三天。 從組裝,鑽牆,上牆調水平,累死我了。