Skip to content

Commit

Permalink
normalize codec code:
Browse files Browse the repository at this point in the history
* csc modules now also have an "init_module()"
* nvcuda caches kernels (same as was done for nvenc)
* opencl kernel: rename "init_context" to "select_device"

git-svn-id: https://xpra.org/svn/Xpra/trunk@4726 3bb7dfac-3a0b-4e04-842a-767bc560f471
  • Loading branch information
totaam committed Nov 10, 2013
1 parent 0162f8f commit 196a436
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 57 deletions.
100 changes: 64 additions & 36 deletions src/xpra/codecs/csc_nvcuda/colorspace_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,62 @@
assert bytearray
import pycuda #@UnresolvedImport
from pycuda import driver #@UnresolvedImport
from pycuda.compiler import SourceModule #@UnresolvedImport
from pycuda.compiler import compile #@UnresolvedImport
driver.init()


DEFAULT_CUDA_DEVICE_ID = int(os.environ.get("XPRA_CUDA_DEVICE", "0"))
COLORSPACES_MAP = {
"BGRA" : ("YUV420P", "YUV422P", "YUV444P"),
"BGRX" : ("YUV420P", "YUV422P", "YUV444P"),
"RGBA" : ("YUV420P", "YUV422P", "YUV444P"),
"RGBX" : ("YUV420P", "YUV422P", "YUV444P"),
}
KERNELS_MAP = {}


def log_sys_info():
log.info("PyCUDA version=%s", ".".join([str(x) for x in driver.get_version()]))
log.info("PyCUDA driver version=%s", driver.get_driver_version())

def device_info(d):
return "%s @ %s" % (d.name(), d.pci_bus_id())

DEFAULT_CUDA_DEVICE_ID = int(os.environ.get("XPRA_CUDA_DEVICE", "0"))
def roundup(n, m):
return (n + m - 1) & ~(m - 1)


#cache pre-compiled kernel cubins per device:
KERNEL_cubins = {}
def get_CUDA_kernel(device_id, src_format, dst_format):
start = time.time()
k = KERNELS_MAP.get((src_format, dst_format))
assert k is not None, "no kernel found for %s to %s" % (src_format, dst_format)
function_name, ksrc = k
global KERNEL_cubins
cubin = KERNEL_cubins.get((device_id, function_name))
if cubin is None:
debug("compiling for device %s: %s=%s", device_id, function_name, ksrc)
cubin = compile(ksrc)
KERNEL_cubins[(device_id, function_name)] = cubin
#now load from cubin:
mod = driver.module_from_buffer(cubin)
CUDA_function = mod.get_function(function_name)
end = time.time()
debug("compilation of %s took %.1fms", function_name, 1000.0*(end-start))
return function_name, CUDA_function


selected_device = None
selected_device_id = None
def select_device():
global selected_device
global selected_device, selected_device_id
if selected_device is not None:
return selected_device
return selected_device_id, selected_device
ngpus = driver.Device.count()
log.info("PyCUDA found %s device(s):", ngpus)
device = None
device_id = -1
for i in range(ngpus):
d = driver.Device(i)
host_mem = d.get_attribute(driver.device_attribute.CAN_MAP_HOST_MEMORY)
Expand All @@ -50,9 +86,11 @@ def select_device():
#debug("compute_capability=%s, attributes=%s", d.compute_capability(), attr)
if host_mem and (device is None or i==DEFAULT_CUDA_DEVICE_ID):
device = d
device_id = i
selected_device = device
return selected_device
assert select_device() is not None, "no valid CUDA devices found"
selected_device_id = device_id
assert selected_device is not None, "no valid CUDA devices found"
return selected_device_id, selected_device

context = None
context_wrapper = None
Expand All @@ -70,38 +108,32 @@ def cleanup(self):
self.context.detach()
self.context = None

def init_context():

def init_module():
global context, context_wrapper
if context_wrapper is not None:
return
log_sys_info()
device = select_device()
device_id, device = select_device()
context = device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST)
debug("testing with context=%s", context)
debug("api version=%s", context.get_api_version())
free, total = driver.mem_get_info()
debug("using device %s", device_info(device))
debug("memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024))
#context.pop()
context_wrapper = CudaContextWrapper(context)
context.pop()

def roundup(n, m):
return (n + m - 1) & ~(m - 1)
#generate kernel sources:
for rgb_format, yuv_formats in COLORSPACES_MAP.items():
m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
KERNELS_MAP.update(m)
_kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()]))
log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_))


COLORSPACES_MAP = {
"BGRA" : ("YUV420P", "YUV422P", "YUV444P"),
"BGRX" : ("YUV420P", "YUV422P", "YUV444P"),
"RGBA" : ("YUV420P", "YUV422P", "YUV444P"),
"RGBX" : ("YUV420P", "YUV422P", "YUV444P"),
}
KERNELS_MAP = {}
for rgb_format, yuv_formats in COLORSPACES_MAP.items():
m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
KERNELS_MAP.update(m)
_kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()]))
log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_))
#now, pre-compile the kernels:
for src_format, dst_format in KERNELS_MAP.keys():
get_CUDA_kernel(device_id, src_format, dst_format)
context.pop()

def get_type():
return "nvcuda"
Expand Down Expand Up @@ -135,6 +167,7 @@ def __init__(self):
self.dst_width = 0
self.dst_height = 0
self.dst_format = ""
self.device_id = 0
self.time = 0
self.frames = 0
self.cuda_device = None
Expand All @@ -156,25 +189,20 @@ def init_context(self, src_width, src_height, src_format,
self.dst_format = dst_format
assert self.src_width==self.dst_width and self.src_height==self.dst_height, "scaling is not supported! (%sx%s to %sx%s)" % (self.src_width, self.src_height, self.dst_width, self.dst_height)

self.device_id = DEFAULT_CUDA_DEVICE_ID
self.init_cuda(0)

def init_cuda(self, device_id):
debug("init_cuda(%s)", device_id)
self.cuda_device = driver.Device(DEFAULT_CUDA_DEVICE_ID)
def init_cuda(self):
debug("init_cuda() device_id=%s", self.device_id)
self.cuda_device = driver.Device(self.device_id)
self.cuda_context = self.cuda_device.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
#use alias to make code easier to read:
d = self.cuda_device
da = driver.device_attribute
try:
debug("init_cuda(%s) cuda_device=%s, cuda_context=%s, thread=%s", device_id, self.cuda_device, self.cuda_context, threading.current_thread())
debug("init_cuda() cuda_device=%s, cuda_context=%s, thread=%s", self.cuda_device, self.cuda_context, threading.current_thread())
#compile/get kernel:
key = self.src_format, self.dst_format
k = KERNELS_MAP.get(key)
assert k is not None, "kernel not found for %s" % str(key)
self.kernel_function_name, ksrc = k
debug("init_cuda(%s) compiling kernel %s: %s", device_id, self.kernel_function_name, ksrc)
mod = SourceModule(ksrc)
self.kernel_function = mod.get_function(self.kernel_function_name)
self.kernel_function_name, self.kernel_function = self.get_CUDA_kernel(self.device_id, self.src_format, self.dst_format)

self.max_block_sizes = d.get_attribute(da.MAX_BLOCK_DIM_X), d.get_attribute(da.MAX_BLOCK_DIM_Y), d.get_attribute(da.MAX_BLOCK_DIM_Z)
self.max_grid_sizes = d.get_attribute(da.MAX_GRID_DIM_X), d.get_attribute(da.MAX_GRID_DIM_Y), d.get_attribute(da.MAX_GRID_DIM_Z)
Expand Down
28 changes: 14 additions & 14 deletions src/xpra/codecs/csc_opencl/colorspace_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,21 @@
if len(opencl_platforms)==0:
raise ImportError("no OpenCL platforms found!")

def roundup(n, m):
return (n + m - 1) & ~(m - 1)

def dimdiv(dim, div):
#when we divide a dimensions by the subsampling
#we want to round up so as to include the last
#pixel when we hit odd dimensions
return roundup(dim/div, div)

def device_type(d):
try:
return pyopencl.device_type.to_string(d.type)
except:
return d.type


def device_info(d):
dtype = device_type(d)
if hasattr(d, "opencl_c_version"):
Expand All @@ -44,7 +52,6 @@ def is_supported(platform_name):
#FreeOCL and pocl do not work:
return not platform_name.startswith("FreeOCL") and not platform_name.startswith("Portable Computing Language")


def log_device_info(device):
if not device:
return
Expand Down Expand Up @@ -76,7 +83,7 @@ def log_version_info():
selected_device = None
selected_platform = None
context = None
def init_context():
def select_device():
global context, selected_device,selected_platform
if context is not None:
return
Expand Down Expand Up @@ -363,7 +370,7 @@ def build_kernels():
global program
if program is not None:
return
init_context()
select_device()
NAMES_TO_KERNELS = gen_kernels()
with warnings.catch_warnings(record=True) as w:
def dump_warnings(logfn):
Expand All @@ -385,20 +392,13 @@ def dump_warnings(logfn):
raise ImportError("cannot build the OpenCL program: %s" % e)


def roundup(n, m):
return (n + m - 1) & ~(m - 1)

def dimdiv(dim, div):
#when we divide a dimensions by the subsampling
#we want to round up so as to include the last
#pixel when we hit odd dimensions
return roundup(dim/div, div)


from xpra.codecs.image_wrapper import ImageWrapper
from xpra.codecs.codec_constants import codec_spec, get_subsampling_divs


def init_module():
build_kernels()

def get_type():
return "opencl"

Expand Down
17 changes: 10 additions & 7 deletions src/xpra/codecs/csc_swscale/colorspace_converter.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,6 @@ cdef extern from "libswscale/swscale.h":
uint8_t *const dst[], const int dstStride[]) nogil


def get_type():
return "swscale"

def get_version():
return get_swscale_version()


cdef class CSCPixelFormat:
cdef AVPixelFormat av_enum
cdef char* av_enum_name
Expand Down Expand Up @@ -163,6 +156,16 @@ def get_swscale_flags_strs(int flags):
return strs


def init_module():
#nothing to do!
pass

def get_type():
return "swscale"

def get_version():
return get_swscale_version()

def get_input_colorspaces():
return COLORSPACES

Expand Down
6 changes: 6 additions & 0 deletions src/xpra/codecs/video_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ def init_csc_option(self, csc_name):
debug("init_csc_option(%s) module=%s", csc_name, csc_module)
if csc_module is None:
return
csc_type = csc_module.get_type()
try:
csc_module.init_module()
except Exception, e:
log.warn("cannot use %s module %s: %s", csc_type, csc_module, e, exc_info=True)
return
in_cscs = csc_module.get_input_colorspaces()
for in_csc in in_cscs:
csc_specs = self._csc_encoder_specs.setdefault(in_csc, [])
Expand Down

0 comments on commit 196a436

Please sign in to comment.