Add support for software video encoding with '-encoder cpu' option, currently h264 only

This commit is contained in:
dec05eba
2024-07-04 01:35:03 +02:00
parent ea7b58d7f9
commit 62d61fda12
21 changed files with 686 additions and 171 deletions

View File

@@ -43,13 +43,6 @@ typedef struct {
double nvfbc_dead_start;
} gsr_capture_nvfbc;
#if defined(_WIN64) || defined(__LP64__)
typedef unsigned long long CUdeviceptr_v2;
#else
typedef unsigned int CUdeviceptr_v2;
#endif
typedef CUdeviceptr_v2 CUdeviceptr;
static int max_int(int a, int b) {
return a > b ? a : b;
}
@@ -301,14 +294,30 @@ static int gsr_capture_nvfbc_setup_session(gsr_capture_nvfbc *cap_nvfbc) {
return 0;
}
static void gsr_capture_nvfbc_stop(gsr_capture_nvfbc *cap_nvfbc) {
gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc);
gsr_capture_base_stop(&cap_nvfbc->base);
gsr_cuda_unload(&cap_nvfbc->cuda);
if(cap_nvfbc->library) {
dlclose(cap_nvfbc->library);
cap_nvfbc->library = NULL;
}
if(cap_nvfbc->params.display_to_capture) {
free((void*)cap_nvfbc->params.display_to_capture);
cap_nvfbc->params.display_to_capture = NULL;
}
}
static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame *frame) {
gsr_capture_nvfbc *cap_nvfbc = cap->priv;
cap_nvfbc->base.video_codec_context = video_codec_context;
cap_nvfbc->base.egl = cap_nvfbc->params.egl;
if(!gsr_cuda_load(&cap_nvfbc->cuda, cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.overclock))
return -1;
if(!cap_nvfbc->params.use_software_video_encoder) {
if(!gsr_cuda_load(&cap_nvfbc->cuda, cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.overclock))
return -1;
}
if(!gsr_capture_nvfbc_load_library(cap)) {
gsr_cuda_unload(&cap_nvfbc->cuda);
@@ -366,18 +375,26 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
frame->width = video_codec_context->width;
frame->height = video_codec_context->height;
if(!cuda_create_codec_context(cap_nvfbc->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_nvfbc->cuda_stream))
goto error_cleanup;
if(cap_nvfbc->params.use_software_video_encoder) {
if(!gsr_capture_base_setup_textures(&cap_nvfbc->base, frame, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr, true)) {
goto error_cleanup;
}
} else {
if(!cap_nvfbc->params.use_software_video_encoder) {
if(!cuda_create_codec_context(cap_nvfbc->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_nvfbc->cuda_stream))
goto error_cleanup;
}
gsr_cuda_context cuda_context = {
.cuda = &cap_nvfbc->cuda,
.cuda_graphics_resources = cap_nvfbc->cuda_graphics_resources,
.mapped_arrays = cap_nvfbc->mapped_arrays
};
gsr_cuda_context cuda_context = {
.cuda = &cap_nvfbc->cuda,
.cuda_graphics_resources = cap_nvfbc->cuda_graphics_resources,
.mapped_arrays = cap_nvfbc->mapped_arrays
};
// TODO: Remove this, it creates shit we dont need
if(!gsr_capture_base_setup_cuda_textures(&cap_nvfbc->base, frame, &cuda_context, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr)) {
goto error_cleanup;
// TODO: Remove this, it creates shit we dont need
if(!gsr_capture_base_setup_cuda_textures(&cap_nvfbc->base, frame, &cuda_context, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr)) {
goto error_cleanup;
}
}
/* Disable vsync */
set_vertical_sync_enabled(cap_nvfbc->params.egl, 0);
@@ -385,9 +402,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
return 0;
error_cleanup:
gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc);
gsr_capture_base_stop(&cap_nvfbc->base);
gsr_cuda_unload(&cap_nvfbc->cuda);
gsr_capture_nvfbc_stop(cap_nvfbc);
return -1;
}
@@ -443,49 +458,52 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) {
(vec2i){0, 0}, (vec2i){frame->width, frame->height},
0.0f, false);
cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window);
if(cap_nvfbc->params.use_software_video_encoder) {
// TODO: Hdr?
const unsigned int formats[2] = { GL_RED, GL_RG };
for(int i = 0; i < 2; ++i) {
cap_nvfbc->params.egl->glBindTexture(GL_TEXTURE_2D, cap_nvfbc->base.target_textures[i]);
cap_nvfbc->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, formats[i], GL_UNSIGNED_BYTE, frame->data[i]);
}
cap_nvfbc->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
// TODO: HDR is broken
const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size
for(int i = 0; i < 2; ++i) {
CUDA_MEMCPY2D memcpy_struct;
memcpy_struct.srcXInBytes = 0;
memcpy_struct.srcY = 0;
memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY;
cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window);
} else {
cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window);
memcpy_struct.dstXInBytes = 0;
memcpy_struct.dstY = 0;
memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE;
// TODO: HDR is broken
const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size
for(int i = 0; i < 2; ++i) {
CUDA_MEMCPY2D memcpy_struct;
memcpy_struct.srcXInBytes = 0;
memcpy_struct.srcY = 0;
memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY;
memcpy_struct.srcArray = cap_nvfbc->mapped_arrays[i];
memcpy_struct.srcPitch = frame->width / div[i];
memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i];
memcpy_struct.dstPitch = frame->linesize[i];
memcpy_struct.WidthInBytes = frame->width * (cap_nvfbc->params.hdr ? 2 : 1);
memcpy_struct.Height = frame->height / div[i];
// TODO: Remove this copy if possible
cap_nvfbc->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_nvfbc->cuda_stream);
memcpy_struct.dstXInBytes = 0;
memcpy_struct.dstY = 0;
memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE;
memcpy_struct.srcArray = cap_nvfbc->mapped_arrays[i];
memcpy_struct.srcPitch = frame->width / div[i];
memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i];
memcpy_struct.dstPitch = frame->linesize[i];
memcpy_struct.WidthInBytes = frame->width * (cap_nvfbc->params.hdr ? 2 : 1);
memcpy_struct.Height = frame->height / div[i];
// TODO: Remove this copy if possible
cap_nvfbc->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_nvfbc->cuda_stream);
}
// TODO: needed?
cap_nvfbc->cuda.cuStreamSynchronize(cap_nvfbc->cuda_stream);
}
// TODO: needed?
cap_nvfbc->cuda.cuStreamSynchronize(cap_nvfbc->cuda_stream);
return 0;
}
static void gsr_capture_nvfbc_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) {
(void)video_codec_context;
gsr_capture_nvfbc *cap_nvfbc = cap->priv;
gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc);
if(cap_nvfbc) {
gsr_capture_base_stop(&cap_nvfbc->base);
gsr_cuda_unload(&cap_nvfbc->cuda);
dlclose(cap_nvfbc->library);
free((void*)cap_nvfbc->params.display_to_capture);
cap_nvfbc->params.display_to_capture = NULL;
free(cap->priv);
cap->priv = NULL;
}
gsr_capture_nvfbc_stop(cap_nvfbc);
free(cap);
}