You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
4.4 KiB
Python

import datetime
import math
import time
import pycuda.autoinit
import pycuda.driver as drv
import numpy as np
from pycuda import gpuarray
from pycuda.compiler import SourceModule
from optparse import OptionParser
ker = SourceModule("""
__global__ void bench_int(const int limit, int *NUMBERS) {
unsigned int id = threadIdx.x + blockDim.x * blockIdx.x;
int num = NUMBERS[id];
for (int i = 0; i < limit; i++) {
num += i;
}
NUMBERS[id] = num;
}
""")
def bench_block_size(grid_size: int, block_size: int, calc_count: int):
fetch_add = ker.get_function("bench_int")
vector_gpu = gpuarray.to_gpu(np.ones(block_size * grid_size).astype(np.intc))
startEvent = drv.Event()
endEvent = drv.Event()
startEvent.record()
fetch_add(np.int_(calc_count), vector_gpu, block=(block_size, 1, 1), grid=(grid_size, 1, 1))
endEvent.record()
endEvent.synchronize()
kernel_execution_time = startEvent.time_till(endEvent)
return (block_size * grid_size) / kernel_execution_time
if __name__ == "__main__":
start_time = datetime.datetime.now()
parser = OptionParser()
parser.add_option("--bench-output", dest="bench_output",
help="The output file for timings", default="timings.csv", type="string")
parser.add_option("--calculation-steps", dest="calculation_steps",
help=" How many calculations steps should be done per GPU thread", default="1000000", type="int")
parser.add_option("--grid_size", dest="grid_size",
help="number of blocks", default="8192", type="int")
parser.add_option("--number_of_threads", dest="number_of_threads",
help="If set the grid size is ignored and is adjusted so that the number of threads is the same "
"in all cases", type="int", default="1048576")
parser.add_option("--block_size_start", dest="block_size_start",
help="initial number of threads per block", default="4",
type="int")
parser.add_option("--block_size_step", dest="block_size_step",
help="The amount the block size increases by every step", default="4",
type="int")
parser.add_option("--block_size_stop", dest="block_size_stop",
help="maximum number of threads per block, max = 1024", default="1024",
type="int")
parser.add_option("--repetitions", dest="repetitions",
help=" The average of n runs that is used instead of using one value only.", default="1",
type="int")
(options, args) = parser.parse_args()
print(
"Benchmarking block size. Grid Size: {}, Start: {}, Step: {} ,Stop: {}, Calculations: {}, Repetitions: {}".format(
options.grid_size, options.block_size_start, options.block_size_step, options.block_size_stop,
options.calculation_steps, options.repetitions))
filename = str(time.strftime("%Y%m%d-%H%M%S")) + "_block_size_" + str(options.number_of_threads) + "threads_" + str(
options.calculation_steps) + "claculations_" + str(options.repetitions) + "repetitions_" + options.bench_output
with open(filename, 'w') as file:
file.write("block size,calls per second\n")
current_block_size = options.block_size_start
while current_block_size <= options.block_size_stop:
total_duration = 0
if options.number_of_threads is not None:
if options.number_of_threads % current_block_size != 0:
print(str(options.number_of_threads) + " is not dividable by block size of " + str(
current_block_size) + " thus will be skipped")
current_block_size += options.block_size_step
continue
else:
current_grid_size = int(options.number_of_threads / current_block_size)
else:
current_grid_size = options.grid_size
for i in range(0, options.repetitions):
duration = bench_block_size(current_grid_size, current_block_size, options.calculation_steps)
total_duration += duration
with open(filename, 'a') as file:
file.write(str(current_block_size) + "," + str(total_duration / options.repetitions) + "\n")
current_block_size += options.block_size_step
print("finished in " + str((datetime.datetime.now()-start_time)))