You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
4.4 KiB
Python
101 lines
4.4 KiB
Python
import datetime
|
|
import math
|
|
import time
|
|
|
|
import pycuda.autoinit
|
|
import pycuda.driver as drv
|
|
import numpy as np
|
|
from pycuda import gpuarray
|
|
from pycuda.compiler import SourceModule
|
|
|
|
from optparse import OptionParser
|
|
|
|
ker = SourceModule("""
|
|
__global__ void bench_int(const int limit, int *NUMBERS) {
|
|
unsigned int id = threadIdx.x + blockDim.x * blockIdx.x;
|
|
int num = NUMBERS[id];
|
|
for (int i = 0; i < limit; i++) {
|
|
num += i;
|
|
}
|
|
NUMBERS[id] = num;
|
|
}
|
|
""")
|
|
|
|
|
|
def bench_block_size(grid_size: int, block_size: int, calc_count: int):
|
|
fetch_add = ker.get_function("bench_int")
|
|
|
|
vector_gpu = gpuarray.to_gpu(np.ones(block_size * grid_size).astype(np.intc))
|
|
|
|
startEvent = drv.Event()
|
|
endEvent = drv.Event()
|
|
startEvent.record()
|
|
|
|
fetch_add(np.int_(calc_count), vector_gpu, block=(block_size, 1, 1), grid=(grid_size, 1, 1))
|
|
|
|
endEvent.record()
|
|
endEvent.synchronize()
|
|
|
|
kernel_execution_time = startEvent.time_till(endEvent)
|
|
return (block_size * grid_size) / kernel_execution_time
|
|
|
|
|
|
if __name__ == "__main__":
|
|
start_time = datetime.datetime.now()
|
|
parser = OptionParser()
|
|
parser.add_option("--bench-output", dest="bench_output",
|
|
help="The output file for timings", default="timings.csv", type="string")
|
|
parser.add_option("--calculation-steps", dest="calculation_steps",
|
|
help=" How many calculations steps should be done per GPU thread", default="1000000", type="int")
|
|
|
|
parser.add_option("--grid_size", dest="grid_size",
|
|
help="number of blocks", default="8192", type="int")
|
|
parser.add_option("--number_of_threads", dest="number_of_threads",
|
|
help="If set the grid size is ignored and is adjusted so that the number of threads is the same "
|
|
"in all cases", type="int", default="1048576")
|
|
parser.add_option("--block_size_start", dest="block_size_start",
|
|
help="initial number of threads per block", default="4",
|
|
type="int")
|
|
parser.add_option("--block_size_step", dest="block_size_step",
|
|
help="The amount the block size increases by every step", default="4",
|
|
type="int")
|
|
parser.add_option("--block_size_stop", dest="block_size_stop",
|
|
help="maximum number of threads per block, max = 1024", default="1024",
|
|
type="int")
|
|
parser.add_option("--repetitions", dest="repetitions",
|
|
help=" The average of n runs that is used instead of using one value only.", default="1",
|
|
type="int")
|
|
(options, args) = parser.parse_args()
|
|
print(
|
|
"Benchmarking block size. Grid Size: {}, Start: {}, Step: {} ,Stop: {}, Calculations: {}, Repetitions: {}".format(
|
|
options.grid_size, options.block_size_start, options.block_size_step, options.block_size_stop,
|
|
options.calculation_steps, options.repetitions))
|
|
|
|
filename = str(time.strftime("%Y%m%d-%H%M%S")) + "_block_size_" + str(options.number_of_threads) + "threads_" + str(
|
|
options.calculation_steps) + "claculations_" + str(options.repetitions) + "repetitions_" + options.bench_output
|
|
with open(filename, 'w') as file:
|
|
file.write("block size,calls per second\n")
|
|
|
|
current_block_size = options.block_size_start
|
|
|
|
while current_block_size <= options.block_size_stop:
|
|
total_duration = 0
|
|
if options.number_of_threads is not None:
|
|
if options.number_of_threads % current_block_size != 0:
|
|
print(str(options.number_of_threads) + " is not dividable by block size of " + str(
|
|
current_block_size) + " thus will be skipped")
|
|
current_block_size += options.block_size_step
|
|
continue
|
|
else:
|
|
current_grid_size = int(options.number_of_threads / current_block_size)
|
|
else:
|
|
current_grid_size = options.grid_size
|
|
for i in range(0, options.repetitions):
|
|
duration = bench_block_size(current_grid_size, current_block_size, options.calculation_steps)
|
|
total_duration += duration
|
|
with open(filename, 'a') as file:
|
|
file.write(str(current_block_size) + "," + str(total_duration / options.repetitions) + "\n")
|
|
current_block_size += options.block_size_step
|
|
|
|
print("finished in " + str((datetime.datetime.now()-start_time)))
|