|
|
|
@ -1,4 +1,7 @@
|
|
|
|
|
import datetime
|
|
|
|
|
import math
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
import pycuda.autoinit
|
|
|
|
|
import pycuda.driver as drv
|
|
|
|
|
import numpy as np
|
|
|
|
@ -38,6 +41,7 @@ def bench_block_size(grid_size: int, block_size: int, calc_count: int):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
start_time = datetime.datetime.now()
|
|
|
|
|
parser = OptionParser()
|
|
|
|
|
parser.add_option("--bench-output", dest="bench_output",
|
|
|
|
|
help="The output file for timings", default="timings.csv", type="string")
|
|
|
|
@ -45,7 +49,10 @@ if __name__ == "__main__":
|
|
|
|
|
help=" How many calculations steps should be done per GPU thread", default="1000000", type="int")
|
|
|
|
|
|
|
|
|
|
parser.add_option("--grid_size", dest="grid_size",
|
|
|
|
|
help="number of blocks", default="1024", type="int")
|
|
|
|
|
help="number of blocks", default="8192", type="int")
|
|
|
|
|
parser.add_option("--number_of_threads", dest="number_of_threads",
|
|
|
|
|
help="If set the grid size is ignored and is adjusted so that the number of threads is the same "
|
|
|
|
|
"in all cases", type="int", default="1048576")
|
|
|
|
|
parser.add_option("--block_size_start", dest="block_size_start",
|
|
|
|
|
help="initial number of threads per block", default="4",
|
|
|
|
|
type="int")
|
|
|
|
@ -61,18 +68,33 @@ if __name__ == "__main__":
|
|
|
|
|
(options, args) = parser.parse_args()
|
|
|
|
|
print(
|
|
|
|
|
"Benchmarking block size. Grid Size: {}, Start: {}, Step: {} ,Stop: {}, Calculations: {}, Repetitions: {}".format(
|
|
|
|
|
options.grid_size, options.block_size_start, options.block_size_step, options.block_size_stop, options.calculation_steps, options.repetitions))
|
|
|
|
|
options.grid_size, options.block_size_start, options.block_size_step, options.block_size_stop,
|
|
|
|
|
options.calculation_steps, options.repetitions))
|
|
|
|
|
|
|
|
|
|
with open(options.bench_output, 'w') as file:
|
|
|
|
|
filename = str(time.strftime("%Y%m%d-%H%M%S")) + "_block_size_" + str(options.number_of_threads) + "threads_" + str(
|
|
|
|
|
options.calculation_steps) + "claculations_" + str(options.repetitions) + "repetitions_" + options.bench_output
|
|
|
|
|
with open(filename, 'w') as file:
|
|
|
|
|
file.write("block size,calls per second\n")
|
|
|
|
|
|
|
|
|
|
current_block_size = options.block_size_start
|
|
|
|
|
|
|
|
|
|
while current_block_size <= options.block_size_stop:
|
|
|
|
|
total_duration = 0
|
|
|
|
|
if options.number_of_threads is not None:
|
|
|
|
|
if options.number_of_threads % current_block_size != 0:
|
|
|
|
|
print(str(options.number_of_threads) + " is not dividable by block size of " + str(
|
|
|
|
|
current_block_size) + " thus will be skipped")
|
|
|
|
|
current_block_size += options.block_size_step
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
current_grid_size = int(options.number_of_threads / current_block_size)
|
|
|
|
|
else:
|
|
|
|
|
current_grid_size = options.grid_size
|
|
|
|
|
for i in range(0, options.repetitions):
|
|
|
|
|
duration = bench_block_size(options.grid_size, current_block_size, options.calculation_steps)
|
|
|
|
|
duration = bench_block_size(current_grid_size, current_block_size, options.calculation_steps)
|
|
|
|
|
total_duration += duration
|
|
|
|
|
with open(options.bench_output, 'a') as file:
|
|
|
|
|
with open(filename, 'a') as file:
|
|
|
|
|
file.write(str(current_block_size) + "," + str(total_duration / options.repetitions) + "\n")
|
|
|
|
|
current_block_size += options.block_size_step
|
|
|
|
|
|
|
|
|
|
print("finished in " + str((datetime.datetime.now()-start_time)))
|
|
|
|
|