nvblas.conf

#This is the configuration file to use NVBLAS Library 
# Setup the environment variable NVBLAS_CONFIG_FILE to specify your own config file. 
# By default, if NVBLAS_CONFIG_FILE is not defined, 
# NVBLAS Library will try to open the file "nvblas.conf" in its current directory 
# Example : NVBLAS_CONFIG_FILE /home/cuda_user/my_nvblas.conf 
# The config file should have restricted write permissions accesses 
# Specify which output log file (default is stderr) NVBLAS_LOGFILE nvblas.log 
# Enable trace log of every intercepted BLAS calls NVBLAS_TRACE_LOG_ENABLED 
#Put here the CPU BLAS fallback Library of your choice 
#It is strongly advised to use full path to describe the location of the CPU Library NVBLAS_CPU_BLAS_LIB /usr/lib/libopenblas.so #NVBLAS_CPU_BLAS_LIB <mkl_path_installtion>/libmkl_rt.so # List of GPU devices Id to participate to the computation 
NVBLAS_CPU_BLAS_LIB /usr/lib/libopenblas.so

# Use ALL if you want all your GPUs to contribute 
# Use ALL0, if you want all your GPUs of the same type as device 0 to contribute 
# However, NVBLAS consider that all GPU have the same performance and PCI bandwidth 
# By default if no GPU are listed, only device 0 will be used 

#NVBLAS_GPU_LIST 0 2 4 
#NVBLAS_GPU_LIST ALL 
#NVBLAS_GPU_LIST ALL0 
#NVBLAS_GPU_LIST ALL 

# Tile Dimension 
NVBLAS_TILE_DIM 2048 

# Autopin Memory 
NVBLAS_AUTOPIN_MEM_ENABLED 

#List of BLAS routines that are prevented from running on GPU (use for debugging purpose 
# The current list of BLAS routines supported by NVBLAS are 
# GEMM, SYRK, HERK, TRSM, TRMM, SYMM, HEMM, SYR2K, HER2K

#NVBLAS_GPU_DISABLED_SGEMM
#NVBLAS_GPU_DISABLED_DGEMM
#NVBLAS_GPU_DISABLED_CGEMM 
#NVBLAS_GPU_DISABLED_ZGEMM 

# Computation can be optionally hybridized between CPU and GPU 
# By default, GPU-supported BLAS routines are ran fully on GPU# The option NVBLAS_CPU_RATIO_<BLAS_ROUTINE> give the ratio [0,1] 
# of the amount of computation that should be done on CPU 
# CAUTION : this option should be used wisely because it can actually 
# significantly reduced the overall performance if too much work is given to CPU
#NVBLAS_CPU_RATIO_CGEMM 0.07

#Read more at: http://docs.nvidia.com/cuda/nvblas/index.html#ixzz4u7GBMb9J 
#Follow us: @GPUComputing on Twitter | NVIDIA on Facebook