Tune av1 quality

This commit is contained in:
dec05eba
2025-04-15 03:34:23 +02:00
parent 4e866a18fc
commit 1f3d28fc5c
9 changed files with 183 additions and 177 deletions

View File

@@ -39,7 +39,7 @@ For you as a user this only means that if you installed GPU Screen Recorder as a
On a system with a i5 4690k CPU and a GTX 1080 GPU:\ On a system with a i5 4690k CPU and a GTX 1080 GPU:\
When recording Legend of Zelda Breath of the Wild at 4k, fps drops from 30 to 7 when using OBS Studio + nvenc, however when using this screen recorder the fps remains at 30.\ When recording Legend of Zelda Breath of the Wild at 4k, fps drops from 30 to 7 when using OBS Studio + nvenc, however when using this screen recorder the fps remains at 30.\
When recording GTA V at 4k on highest settings, fps drops from 60 to 23 when using obs-nvfbc + nvenc, however when using this screen recorder the fps only drops to 58.\ When recording GTA V at 4k on highest settings, fps drops from 60 to 23 when using obs-nvfbc + nvenc, however when using this screen recorder the fps only drops to 58.\
GPU Screen Recorder also produces much smoother videos than OBS when GPU utilization is close to 100%, see comparison here: [https://www.youtube.com/watch?v=zfj4sNVLLLg](https://www.youtube.com/watch?v=zfj4sNVLLLg).\ GPU Screen Recorder also produces much smoother videos than OBS when GPU utilization is close to 100%, see comparison here: [https://www.youtube.com/watch?v=zfj4sNVLLLg](https://www.youtube.com/watch?v=zfj4sNVLLLg) and [https://www.youtube.com/watch?v=aK67RSZw2ZQ](https://www.youtube.com/watch?v=aK67RSZw2ZQ).\
GPU Screen Recorder has much better performance than OBS Studio even with version 30.2 that does "zero-copy" recording and encoding, see: [https://www.youtube.com/watch?v=jdroRjibsDw](https://www.youtube.com/watch?v=jdroRjibsDw).\ GPU Screen Recorder has much better performance than OBS Studio even with version 30.2 that does "zero-copy" recording and encoding, see: [https://www.youtube.com/watch?v=jdroRjibsDw](https://www.youtube.com/watch?v=jdroRjibsDw).\
It is recommended to save the video to a SSD because of the large file size, which a slow HDD might not be fast enough to handle. Using variable framerate mode (-fm vfr) which is the default is also recommended as this reduces encoding load. Ultra quality is also overkill most of the time, very high (the default) or lower quality is usually enough.\ It is recommended to save the video to a SSD because of the large file size, which a slow HDD might not be fast enough to handle. Using variable framerate mode (-fm vfr) which is the default is also recommended as this reduces encoding load. Ultra quality is also overkill most of the time, very high (the default) or lower quality is usually enough.\
Note that for best performance you should close other screen recorders such as OBS Studio when using GPU Screen Recorder even if they are not recording, since they can affect performance even when idle. This is the case with OBS Studio. Note that for best performance you should close other screen recorders such as OBS Studio when using GPU Screen Recorder even if they are not recording, since they can affect performance even when idle. This is the case with OBS Studio.

4
TODO
View File

@@ -265,3 +265,7 @@ nvfbc capture cursor with cursor.h instead and composite that on top. This allow
Maybe remove external shader code and make a simple external to internal texture converter (compute shader), to reduce texture sampling. Maybe this is faster? Maybe remove external shader code and make a simple external to internal texture converter (compute shader), to reduce texture sampling. Maybe this is faster?
Fix opengl context broken after suspend on nvidia by using this: https://registry.khronos.org/OpenGL/extensions/NV/NV_robustness_video_memory_purge.txt requires glx context creation flags and GetGraphicsResetStatusARB() == PURGED_CONTEXT_RESET_NV check to recreate all graphics. Fix opengl context broken after suspend on nvidia by using this: https://registry.khronos.org/OpenGL/extensions/NV/NV_robustness_video_memory_purge.txt requires glx context creation flags and GetGraphicsResetStatusARB() == PURGED_CONTEXT_RESET_NV check to recreate all graphics.
HDR looks incorrect, brightest point gets cut off.
Make "screen" capture the preferred monitor.

View File

@@ -14,11 +14,6 @@ typedef struct {
gsr_gpu_vendor vendor; gsr_gpu_vendor vendor;
int gpu_version; /* 0 if unknown */ int gpu_version; /* 0 if unknown */
bool is_steam_deck; bool is_steam_deck;
/* Only currently set for Mesa. 0 if unknown format */
int driver_major;
int driver_minor;
int driver_patch;
} gsr_gpu_info; } gsr_gpu_info;
typedef enum { typedef enum {

View File

@@ -169,6 +169,13 @@ typedef void (*GLDEBUGPROC)(unsigned int source, unsigned int type, unsigned int
typedef int (*FUNC_eglQueryDisplayAttribEXT)(EGLDisplay dpy, int32_t attribute, intptr_t *value); typedef int (*FUNC_eglQueryDisplayAttribEXT)(EGLDisplay dpy, int32_t attribute, intptr_t *value);
typedef const char* (*FUNC_eglQueryDeviceStringEXT)(void *device, int32_t name); typedef const char* (*FUNC_eglQueryDeviceStringEXT)(void *device, int32_t name);
typedef int (*FUNC_eglQueryDmaBufModifiersEXT)(EGLDisplay dpy, int32_t format, int32_t max_modifiers, uint64_t *modifiers, int *external_only, int32_t *num_modifiers); typedef int (*FUNC_eglQueryDmaBufModifiersEXT)(EGLDisplay dpy, int32_t format, int32_t max_modifiers, uint64_t *modifiers, int *external_only, int32_t *num_modifiers);
typedef void (*FUNC_glCreateMemoryObjectsEXT)(int n, unsigned int *memoryObjects);
typedef void (*FUNC_glImportMemoryFdEXT)(unsigned int memory, uint64_t size, unsigned int handleType, int fd);
typedef unsigned char (*FUNC_glIsMemoryObjectEXT)(unsigned int memoryObject);
typedef void (*FUNC_glTexStorageMem2DEXT)(unsigned int target, int levels, unsigned int internalFormat, int width, int height, unsigned int memory, uint64_t offset);
typedef void (*FUNC_glBufferStorageMemEXT)(unsigned int target, ssize_t size, unsigned int memory, uint64_t offset);
typedef void (*FUNC_glNamedBufferStorageMemEXT)(unsigned int buffer, ssize_t size, unsigned int memory, uint64_t offset);
typedef void (*FUNC_glMemoryObjectParameterivEXT)(unsigned int memoryObject, unsigned int pname, const int *params);
typedef enum { typedef enum {
GSR_GL_CONTEXT_TYPE_EGL, GSR_GL_CONTEXT_TYPE_EGL,
@@ -219,6 +226,13 @@ struct gsr_egl {
FUNC_eglQueryDisplayAttribEXT eglQueryDisplayAttribEXT; FUNC_eglQueryDisplayAttribEXT eglQueryDisplayAttribEXT;
FUNC_eglQueryDeviceStringEXT eglQueryDeviceStringEXT; FUNC_eglQueryDeviceStringEXT eglQueryDeviceStringEXT;
FUNC_eglQueryDmaBufModifiersEXT eglQueryDmaBufModifiersEXT; FUNC_eglQueryDmaBufModifiersEXT eglQueryDmaBufModifiersEXT;
FUNC_glCreateMemoryObjectsEXT glCreateMemoryObjectsEXT;
FUNC_glImportMemoryFdEXT glImportMemoryFdEXT;
FUNC_glIsMemoryObjectEXT glIsMemoryObjectEXT;
FUNC_glTexStorageMem2DEXT glTexStorageMem2DEXT;
FUNC_glBufferStorageMemEXT glBufferStorageMemEXT;
FUNC_glNamedBufferStorageMemEXT glNamedBufferStorageMemEXT;
FUNC_glMemoryObjectParameterivEXT glMemoryObjectParameterivEXT;
__GLXextFuncPtr (*glXGetProcAddress)(const unsigned char *procName); __GLXextFuncPtr (*glXGetProcAddress)(const unsigned char *procName);
GLXFBConfig* (*glXChooseFBConfig)(Display *dpy, int screen, const int *attribList, int *nitems); GLXFBConfig* (*glXChooseFBConfig)(Display *dpy, int screen, const int *attribList, int *nitems);
@@ -298,6 +312,7 @@ struct gsr_egl {
void (*glUniformMatrix2fv)(int location, int count, unsigned char transpose, const float *value); void (*glUniformMatrix2fv)(int location, int count, unsigned char transpose, const float *value);
void (*glDebugMessageCallback)(GLDEBUGPROC callback, const void *userParam); void (*glDebugMessageCallback)(GLDEBUGPROC callback, const void *userParam);
void (*glScissor)(int x, int y, int width, int height); void (*glScissor)(int x, int y, int width, int height);
void (*glCreateBuffers)(int n, unsigned int *buffers);
void (*glReadPixels)(int x, int y, int width, int height, unsigned int format, unsigned int type, void *pixels); void (*glReadPixels)(int x, int y, int width, int height, unsigned int format, unsigned int type, void *pixels);
void* (*glMapBuffer)(unsigned int target, unsigned int access); void* (*glMapBuffer)(unsigned int target, unsigned int access);
unsigned char (*glUnmapBuffer)(unsigned int target); unsigned char (*glUnmapBuffer)(unsigned int target);

View File

@@ -33,4 +33,4 @@ wayland-client = ">=1"
dbus-1 = ">=1" dbus-1 = ">=1"
libpipewire-0.3 = ">=1" libpipewire-0.3 = ">=1"
libspa-0.2 = ">=0" libspa-0.2 = ">=0"
#vulkan = ">=1" vulkan = ">=1"

View File

@@ -225,6 +225,14 @@ static bool gsr_egl_proc_load_egl(gsr_egl *self) {
self->eglQueryDeviceStringEXT = (FUNC_eglQueryDeviceStringEXT)self->eglGetProcAddress("eglQueryDeviceStringEXT"); self->eglQueryDeviceStringEXT = (FUNC_eglQueryDeviceStringEXT)self->eglGetProcAddress("eglQueryDeviceStringEXT");
self->eglQueryDmaBufModifiersEXT = (FUNC_eglQueryDmaBufModifiersEXT)self->eglGetProcAddress("eglQueryDmaBufModifiersEXT"); self->eglQueryDmaBufModifiersEXT = (FUNC_eglQueryDmaBufModifiersEXT)self->eglGetProcAddress("eglQueryDmaBufModifiersEXT");
self->glCreateMemoryObjectsEXT = (FUNC_glCreateMemoryObjectsEXT)self->eglGetProcAddress("glCreateMemoryObjectsEXT");
self->glImportMemoryFdEXT = (FUNC_glImportMemoryFdEXT)self->eglGetProcAddress("glImportMemoryFdEXT");
self->glIsMemoryObjectEXT = (FUNC_glIsMemoryObjectEXT)self->eglGetProcAddress("glIsMemoryObjectEXT");
self->glTexStorageMem2DEXT = (FUNC_glTexStorageMem2DEXT)self->eglGetProcAddress("glTexStorageMem2DEXT");
self->glBufferStorageMemEXT = (FUNC_glBufferStorageMemEXT)self->eglGetProcAddress("glBufferStorageMemEXT");
self->glNamedBufferStorageMemEXT = (FUNC_glNamedBufferStorageMemEXT)self->eglGetProcAddress("glNamedBufferStorageMemEXT");
self->glMemoryObjectParameterivEXT = (FUNC_glMemoryObjectParameterivEXT)self->eglGetProcAddress("glMemoryObjectParameterivEXT");
if(!self->eglExportDMABUFImageQueryMESA) { if(!self->eglExportDMABUFImageQueryMESA) {
fprintf(stderr, "gsr error: gsr_egl_load failed: could not find eglExportDMABUFImageQueryMESA\n"); fprintf(stderr, "gsr error: gsr_egl_load failed: could not find eglExportDMABUFImageQueryMESA\n");
return false; return false;

View File

@@ -8,26 +8,15 @@
//#include <vulkan/vulkan_core.h> //#include <vulkan/vulkan_core.h>
#define GL_HANDLE_TYPE_OPAQUE_FD_EXT 0x9586
#define GL_TEXTURE_TILING_EXT 0x9580 #define GL_TEXTURE_TILING_EXT 0x9580
#define GL_OPTIMAL_TILING_EXT 0x9584 #define GL_OPTIMAL_TILING_EXT 0x9584
#define GL_LINEAR_TILING_EXT 0x9585 #define GL_LINEAR_TILING_EXT 0x9585
#define GL_PIXEL_PACK_BUFFER 0x88EB
#define GL_PIXEL_UNPACK_BUFFER 0x88EC
#define GL_STREAM_READ 0x88E1
#define GL_STREAM_DRAW 0x88E0
#define GL_READ_ONLY 0x88B8
#define GL_WRITE_ONLY 0x88B9
#define GL_READ_FRAMEBUFFER 0x8CA8
typedef struct { typedef struct {
gsr_video_encoder_vulkan_params params; gsr_video_encoder_vulkan_params params;
unsigned int target_textures[2]; unsigned int target_textures[2];
AVBufferRef *device_ctx; AVBufferRef *device_ctx;
AVVulkanDeviceContext* vv;
unsigned int pbo_y[2];
unsigned int pbo_uv[2];
AVFrame *sw_frame;
} gsr_video_encoder_vulkan; } gsr_video_encoder_vulkan;
static bool gsr_video_encoder_vulkan_setup_context(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context) { static bool gsr_video_encoder_vulkan_setup_context(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context) {
@@ -84,6 +73,24 @@ static AVVulkanDeviceContext* video_codec_context_get_vulkan_data(AVCodecContext
return (AVVulkanDeviceContext*)device_context->hwctx; return (AVVulkanDeviceContext*)device_context->hwctx;
} }
static uint32_t get_memory_type_idx(VkPhysicalDevice pdev, const VkMemoryRequirements *mem_reqs, VkMemoryPropertyFlagBits prop_flags, PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties) {
VkPhysicalDeviceMemoryProperties pdev_mem_props;
uint32_t i;
vkGetPhysicalDeviceMemoryProperties(pdev, &pdev_mem_props);
for (i = 0; i < pdev_mem_props.memoryTypeCount; i++) {
const VkMemoryType *type = &pdev_mem_props.memoryTypes[i];
if ((mem_reqs->memoryTypeBits & (1 << i)) &&
(type->propertyFlags & prop_flags) == prop_flags) {
return i;
break;
}
}
return UINT32_MAX;
}
static bool gsr_video_encoder_vulkan_setup_textures(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context, AVFrame *frame) { static bool gsr_video_encoder_vulkan_setup_textures(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context, AVFrame *frame) {
const int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0); const int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0);
if(res < 0) { if(res < 0) {
@@ -91,56 +98,133 @@ static bool gsr_video_encoder_vulkan_setup_textures(gsr_video_encoder_vulkan *se
return false; return false;
} }
//AVVkFrame *target_surface_id = (AVVkFrame*)frame->data[0]; while(self->params.egl->glGetError()) {}
self->vv = video_codec_context_get_vulkan_data(video_codec_context);
const unsigned int internal_formats_nv12[2] = { GL_RGBA8, GL_RGBA8 }; // TODO: GL_R8, GL_R16 AVVkFrame *target_surface_id = (AVVkFrame*)frame->data[0];
const unsigned int internal_formats_p010[2] = { GL_R16, GL_RG16 }; AVVulkanDeviceContext* vv = video_codec_context_get_vulkan_data(video_codec_context);
const unsigned int formats[2] = { GL_RED, GL_RG }; const size_t luma_size = frame->width * frame->height;
const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size if(vv) {
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vv->get_proc_addr(vv->inst, "vkGetImageMemoryRequirements");
PFN_vkAllocateMemory vkAllocateMemory = (PFN_vkAllocateMemory)vv->get_proc_addr(vv->inst, "vkAllocateMemory");
PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vv->get_proc_addr(vv->inst, "vkGetPhysicalDeviceMemoryProperties");
PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vv->get_proc_addr(vv->inst, "vkGetMemoryFdKHR");
for(int i = 0; i < 2; ++i) { VkMemoryRequirements mem_reqs = {0};
self->target_textures[i] = gl_create_texture(self->params.egl, video_codec_context->width / div[i], video_codec_context->height / div[i], self->params.color_depth == GSR_COLOR_DEPTH_8_BITS ? internal_formats_nv12[i] : internal_formats_p010[i], formats[i], GL_NEAREST); vkGetImageMemoryRequirements(vv->act_dev, target_surface_id->img[0], &mem_reqs);
if(self->target_textures[i] == 0) {
fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_textures: failed to create opengl texture\n"); fprintf(stderr, "size: %lu, alignment: %lu, memory bits: 0x%08x\n", mem_reqs.size, mem_reqs.alignment, mem_reqs.memoryTypeBits);
return false; VkDeviceMemory mem;
} {
VkExportMemoryAllocateInfo exp_mem_info;
VkMemoryAllocateInfo mem_alloc_info;
VkMemoryDedicatedAllocateInfoKHR ded_info;
memset(&exp_mem_info, 0, sizeof(exp_mem_info));
exp_mem_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
exp_mem_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
memset(&ded_info, 0, sizeof(ded_info));
ded_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
ded_info.image = target_surface_id->img[0];
exp_mem_info.pNext = &ded_info;
memset(&mem_alloc_info, 0, sizeof(mem_alloc_info));
mem_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
mem_alloc_info.pNext = &exp_mem_info;
mem_alloc_info.allocationSize = target_surface_id->size[0];
mem_alloc_info.memoryTypeIndex = get_memory_type_idx(vv->phys_dev, &mem_reqs, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vkGetPhysicalDeviceMemoryProperties);
if (mem_alloc_info.memoryTypeIndex == UINT32_MAX) {
fprintf(stderr, "No suitable memory type index found.\n");
return VK_NULL_HANDLE;
} }
self->params.egl->glGenBuffers(2, self->pbo_y); if (vkAllocateMemory(vv->act_dev, &mem_alloc_info, 0, &mem) !=
VK_SUCCESS)
return VK_NULL_HANDLE;
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[0]); fprintf(stderr, "memory: %p\n", (void*)mem);
self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ);
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[1]);
self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ);
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
self->params.egl->glGenBuffers(2, self->pbo_uv);
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[0]);
self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ);
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[1]);
self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ);
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
self->sw_frame = av_frame_alloc();
self->sw_frame->format = AV_PIX_FMT_NV12;
self->sw_frame->width = frame->width;
self->sw_frame->height = frame->height;
// TODO: Remove
if(av_frame_get_buffer(self->sw_frame, 0) < 0) {
fprintf(stderr, "failed to allocate sw frame\n");
} }
// TODO: Remove fprintf(stderr, "target surface id: %p, %zu, %zu\n", (void*)target_surface_id->mem[0], target_surface_id->offset[0], target_surface_id->offset[1]);
if(av_frame_make_writable(self->sw_frame) < 0) { fprintf(stderr, "vkGetMemoryFdKHR: %p\n", (void*)vkGetMemoryFdKHR);
fprintf(stderr, "failed to make writable\n");
int fd = 0;
VkMemoryGetFdInfoKHR fd_info;
memset(&fd_info, 0, sizeof(fd_info));
fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
fd_info.memory = target_surface_id->mem[0];
fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
if(vkGetMemoryFdKHR(vv->act_dev, &fd_info, &fd) != VK_SUCCESS) {
fprintf(stderr, "failed!\n");
} else {
fprintf(stderr, "fd: %d\n", fd);
} }
fprintf(stderr, "glImportMemoryFdEXT: %p, size: %zu\n", (void*)self->params.egl->glImportMemoryFdEXT, target_surface_id->size[0]);
const int tiling = target_surface_id->tiling == VK_IMAGE_TILING_LINEAR ? GL_LINEAR_TILING_EXT : GL_OPTIMAL_TILING_EXT;
if(tiling != GL_OPTIMAL_TILING_EXT) {
fprintf(stderr, "tiling %d is not supported, only GL_OPTIMAL_TILING_EXT (%d) is supported\n", tiling, GL_OPTIMAL_TILING_EXT);
}
unsigned int gl_memory_obj = 0;
self->params.egl->glCreateMemoryObjectsEXT(1, &gl_memory_obj);
//const int dedicated = GL_TRUE;
//self->params.egl->glMemoryObjectParameterivEXT(gl_memory_obj, GL_DEDICATED_MEMORY_OBJECT_EXT, &dedicated);
self->params.egl->glImportMemoryFdEXT(gl_memory_obj, target_surface_id->size[0], GL_HANDLE_TYPE_OPAQUE_FD_EXT, fd);
if(!self->params.egl->glIsMemoryObjectEXT(gl_memory_obj))
fprintf(stderr, "failed to create object!\n");
fprintf(stderr, "gl memory obj: %u, error: %d\n", gl_memory_obj, self->params.egl->glGetError());
// fprintf(stderr, "0 gl error: %d\n", self->params.egl->glGetError());
// unsigned int vertex_buffer = 0;
// self->params.egl->glGenBuffers(1, &vertex_buffer);
// self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
// self->params.egl->glBufferStorageMemEXT(GL_ARRAY_BUFFER, target_surface_id->size[0], gl_memory_obj, target_surface_id->offset[0]);
// fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError());
// fprintf(stderr, "0 gl error: %d\n", self->params.egl->glGetError());
// unsigned int buffer = 0;
// self->params.egl->glCreateBuffers(1, &buffer);
// self->params.egl->glNamedBufferStorageMemEXT(buffer, target_surface_id->size[0], gl_memory_obj, target_surface_id->offset[0]);
// fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError());
self->params.egl->glGenTextures(1, &self->target_textures[0]);
self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[0]);
fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError());
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, tiling);
fprintf(stderr, "tiling: %d\n", tiling);
fprintf(stderr, "2 gl error: %d\n", self->params.egl->glGetError());
self->params.egl->glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, GL_R8, frame->width, frame->height, gl_memory_obj, target_surface_id->offset[0]);
fprintf(stderr, "3 gl error: %d\n", self->params.egl->glGetError());
self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
self->params.egl->glGenTextures(1, &self->target_textures[1]);
self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[1]);
fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError());
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, tiling);
fprintf(stderr, "tiling: %d\n", tiling);
fprintf(stderr, "2 gl error: %d\n", self->params.egl->glGetError());
self->params.egl->glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, GL_RG8, frame->width/2, frame->height/2, gl_memory_obj, target_surface_id->offset[0] + luma_size);
fprintf(stderr, "3 gl error: %d\n", self->params.egl->glGetError());
self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
}
return true; return true;
} }
@@ -185,91 +269,6 @@ void gsr_video_encoder_vulkan_stop(gsr_video_encoder_vulkan *self, AVCodecContex
av_buffer_unref(&self->device_ctx); av_buffer_unref(&self->device_ctx);
} }
static void nop_free(void *opaque, uint8_t *data) {
}
static void gsr_video_encoder_vulkan_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame, gsr_color_conversion *color_conversion) {
gsr_video_encoder_vulkan *self = encoder->priv;
static int counter = 0;
++counter;
// AVBufferRef *av_buffer_create(uint8_t *data, size_t size,
// void (*free)(void *opaque, uint8_t *data),
// void *opaque, int flags);
while(self->params.egl->glGetError()){}
self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, color_conversion->framebuffers[0]);
//fprintf(stderr, "1 gl err: %d\n", self->params.egl->glGetError());
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[counter % 2]);
self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ);
self->params.egl->glReadPixels(0, 0, frame->width, frame->height, GL_RED, GL_UNSIGNED_BYTE, 0);
//fprintf(stderr, "2 gl err: %d\n", self->params.egl->glGetError());
const int next_pbo_y = (counter + 1) % 2;
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_y]);
self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ);
//fprintf(stderr, "3 gl err: %d\n", self->params.egl->glGetError());
uint8_t *ptr_y = (uint8_t*)self->params.egl->glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
//fprintf(stderr, "4 gl err: %d\n", self->params.egl->glGetError());
if(!ptr_y) {
fprintf(stderr, "failed to map buffer y!\n");
}
while(self->params.egl->glGetError()){}
self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, color_conversion->framebuffers[1]);
//fprintf(stderr, "5 gl err: %d\n", self->params.egl->glGetError());
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[counter % 2]);
self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ);
//fprintf(stderr, "5.5 gl err: %d\n", self->params.egl->glGetError());
self->params.egl->glReadPixels(0, 0, frame->width/2, frame->height/2, GL_RG, GL_UNSIGNED_BYTE, 0);
//fprintf(stderr, "6 gl err: %d\n", self->params.egl->glGetError());
const int next_pbo_uv = (counter + 1) % 2;
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[next_pbo_uv]);
self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ);
//fprintf(stderr, "7 gl err: %d\n", self->params.egl->glGetError());
uint8_t *ptr_uv = (uint8_t*)self->params.egl->glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
//fprintf(stderr, "8 gl err: %d\n", self->params.egl->glGetError());
if(!ptr_uv) {
fprintf(stderr, "failed to map buffer uv!\n");
}
//self->sw_frame->buf[0] = av_buffer_create(ptr_y, 3840 * 2160, nop_free, NULL, 0);
//self->sw_frame->buf[1] = av_buffer_create(ptr_uv, 1920 * 1080 * 2, nop_free, NULL, 0);
//self->sw_frame->data[0] = self->sw_frame->buf[0]->data;
//self->sw_frame->data[1] = self->sw_frame->buf[1]->data;
//self->sw_frame->extended_data[0] = self->sw_frame->data[0];
//self->sw_frame->extended_data[1] = self->sw_frame->data[1];
self->sw_frame->data[0] = ptr_y;
self->sw_frame->data[1] = ptr_uv;
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
//self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[1]);
//self->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, sw_frame->data[1]);
//self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
int ret = av_hwframe_transfer_data(frame, self->sw_frame, 0);
if(ret < 0) {
fprintf(stderr, "transfer data failed, error: %s\n", av_err2str(ret));
}
//av_buffer_unref(&self->sw_frame->buf[0]);
//av_buffer_unref(&self->sw_frame->buf[1]);
//av_frame_free(&sw_frame);
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_y]);
self->params.egl->glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_uv]);
self->params.egl->glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
static void gsr_video_encoder_vulkan_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { static void gsr_video_encoder_vulkan_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) {
gsr_video_encoder_vulkan *self = encoder->priv; gsr_video_encoder_vulkan *self = encoder->priv;
textures[0] = self->target_textures[0]; textures[0] = self->target_textures[0];
@@ -299,7 +298,7 @@ gsr_video_encoder* gsr_video_encoder_vulkan_create(const gsr_video_encoder_vulka
*encoder = (gsr_video_encoder) { *encoder = (gsr_video_encoder) {
.start = gsr_video_encoder_vulkan_start, .start = gsr_video_encoder_vulkan_start,
.copy_textures_to_frame = gsr_video_encoder_vulkan_copy_textures_to_frame, .copy_textures_to_frame = NULL,
.get_textures = gsr_video_encoder_vulkan_get_textures, .get_textures = gsr_video_encoder_vulkan_get_textures,
.destroy = gsr_video_encoder_vulkan_destroy, .destroy = gsr_video_encoder_vulkan_destroy,
.priv = encoder_vulkan .priv = encoder_vulkan

View File

@@ -634,16 +634,16 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt,
if(codec_context->codec_id == AV_CODEC_ID_AV1 || codec_context->codec_id == AV_CODEC_ID_H264 || codec_context->codec_id == AV_CODEC_ID_HEVC) { if(codec_context->codec_id == AV_CODEC_ID_AV1 || codec_context->codec_id == AV_CODEC_ID_H264 || codec_context->codec_id == AV_CODEC_ID_HEVC) {
switch(video_quality) { switch(video_quality) {
case VideoQuality::MEDIUM: case VideoQuality::MEDIUM:
codec_context->global_quality = 150 * quality_multiply; codec_context->global_quality = 130 * quality_multiply;
break; break;
case VideoQuality::HIGH: case VideoQuality::HIGH:
codec_context->global_quality = 120 * quality_multiply; codec_context->global_quality = 110 * quality_multiply;
break; break;
case VideoQuality::VERY_HIGH: case VideoQuality::VERY_HIGH:
codec_context->global_quality = 115 * quality_multiply; codec_context->global_quality = 95 * quality_multiply;
break; break;
case VideoQuality::ULTRA: case VideoQuality::ULTRA:
codec_context->global_quality = 90 * quality_multiply; codec_context->global_quality = 85 * quality_multiply;
break; break;
} }
} else if(codec_context->codec_id == AV_CODEC_ID_VP8) { } else if(codec_context->codec_id == AV_CODEC_ID_VP8) {
@@ -763,7 +763,7 @@ static AVFrame* create_audio_frame(AVCodecContext *audio_codec_context) {
return frame; return frame;
} }
static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendor, gsr_color_depth color_depth, AVDictionary **options) { static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendor, gsr_color_depth color_depth, VideoCodec video_codec, AVDictionary **options) {
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(61, 17, 100) #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(61, 17, 100)
if(codec_context->codec_id == AV_CODEC_ID_H264) { if(codec_context->codec_id == AV_CODEC_ID_H264) {
// TODO: Only for vaapi // TODO: Only for vaapi
@@ -785,14 +785,15 @@ static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendo
av_dict_set(options, "profile", "main", 0); av_dict_set(options, "profile", "main", 0);
} }
#else #else
const bool use_nvidia_values = vendor == GSR_GPU_VENDOR_NVIDIA && !video_codec_is_vulkan(video_codec);
if(codec_context->codec_id == AV_CODEC_ID_H264) { if(codec_context->codec_id == AV_CODEC_ID_H264) {
// TODO: Only for vaapi // TODO: Only for vaapi
//if(color_depth == GSR_COLOR_DEPTH_10_BITS) //if(color_depth == GSR_COLOR_DEPTH_10_BITS)
// av_dict_set_int(options, "profile", AV_PROFILE_H264_HIGH_10, 0); // av_dict_set_int(options, "profile", AV_PROFILE_H264_HIGH_10, 0);
//else //else
av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 2 : AV_PROFILE_H264_HIGH, 0); av_dict_set_int(options, "profile", use_nvidia_values ? 2 : AV_PROFILE_H264_HIGH, 0);
} else if(codec_context->codec_id == AV_CODEC_ID_AV1) { } else if(codec_context->codec_id == AV_CODEC_ID_AV1) {
if(vendor == GSR_GPU_VENDOR_NVIDIA) { if(use_nvidia_values) {
if(color_depth == GSR_COLOR_DEPTH_10_BITS) if(color_depth == GSR_COLOR_DEPTH_10_BITS)
av_dict_set_int(options, "highbitdepth", 1, 0); av_dict_set_int(options, "highbitdepth", 1, 0);
} else { } else {
@@ -800,9 +801,9 @@ static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendo
} }
} else if(codec_context->codec_id == AV_CODEC_ID_HEVC) { } else if(codec_context->codec_id == AV_CODEC_ID_HEVC) {
if(color_depth == GSR_COLOR_DEPTH_10_BITS) if(color_depth == GSR_COLOR_DEPTH_10_BITS)
av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 1 : AV_PROFILE_HEVC_MAIN_10, 0); av_dict_set_int(options, "profile", use_nvidia_values ? 1 : AV_PROFILE_HEVC_MAIN_10, 0);
else else
av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 0 : AV_PROFILE_HEVC_MAIN, 0); av_dict_set_int(options, "profile", use_nvidia_values ? 0 : AV_PROFILE_HEVC_MAIN, 0);
} }
#endif #endif
} }
@@ -867,7 +868,7 @@ static void open_video_software(AVCodecContext *codec_context, VideoQuality vide
av_dict_set(&options, "preset", "veryfast", 0); av_dict_set(&options, "preset", "veryfast", 0);
av_dict_set(&options, "tune", "film", 0); av_dict_set(&options, "tune", "film", 0);
dict_set_profile(codec_context, GSR_GPU_VENDOR_INTEL, color_depth, &options); dict_set_profile(codec_context, GSR_GPU_VENDOR_INTEL, color_depth, VideoCodec::H264, &options);
if(codec_context->codec_id == AV_CODEC_ID_H264) { if(codec_context->codec_id == AV_CODEC_ID_H264) {
av_dict_set(&options, "coder", "cabac", 0); // TODO: cavlc is faster than cabac but worse compression. Which to use? av_dict_set(&options, "coder", "cabac", 0); // TODO: cavlc is faster than cabac but worse compression. Which to use?
@@ -1043,9 +1044,14 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide
// TODO: Enable multipass // TODO: Enable multipass
// TODO: Set "usage" option to "record"/"stream" and "content" option to "rendered" for vulkan encoding dict_set_profile(codec_context, vendor, color_depth, video_codec, &options);
if(vendor == GSR_GPU_VENDOR_NVIDIA) { if(video_codec_is_vulkan(video_codec)) {
av_dict_set_int(&options, "async_depth", 3, 0);
av_dict_set(&options, "tune", "hq", 0);
av_dict_set(&options, "usage", "record", 0); // TODO: Set to stream when streaming
av_dict_set(&options, "content", "rendered", 0);
} else if(vendor == GSR_GPU_VENDOR_NVIDIA) {
// TODO: These dont seem to be necessary // TODO: These dont seem to be necessary
// av_dict_set_int(&options, "zerolatency", 1, 0); // av_dict_set_int(&options, "zerolatency", 1, 0);
// if(codec_context->codec_id == AV_CODEC_ID_AV1) { // if(codec_context->codec_id == AV_CODEC_ID_AV1) {
@@ -1067,8 +1073,6 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide
break; break;
} }
dict_set_profile(codec_context, vendor, color_depth, &options);
if(codec_context->codec_id == AV_CODEC_ID_H264) { if(codec_context->codec_id == AV_CODEC_ID_H264) {
// TODO: h264 10bit? // TODO: h264 10bit?
// TODO: // TODO:

View File

@@ -362,13 +362,9 @@ bool gl_get_gpu_info(gsr_egl *egl, gsr_gpu_info *info) {
bool supported = true; bool supported = true;
const unsigned char *gl_vendor = egl->glGetString(GL_VENDOR); const unsigned char *gl_vendor = egl->glGetString(GL_VENDOR);
const unsigned char *gl_renderer = egl->glGetString(GL_RENDERER); const unsigned char *gl_renderer = egl->glGetString(GL_RENDERER);
const unsigned char *gl_version = egl->glGetString(GL_VERSION);
info->gpu_version = 0; info->gpu_version = 0;
info->is_steam_deck = false; info->is_steam_deck = false;
info->driver_major = 0;
info->driver_minor = 0;
info->driver_patch = 0;
if(!gl_vendor) { if(!gl_vendor) {
fprintf(stderr, "gsr error: failed to get gpu vendor\n"); fprintf(stderr, "gsr error: failed to get gpu vendor\n");
@@ -408,21 +404,6 @@ bool gl_get_gpu_info(gsr_egl *egl, gsr_gpu_info *info) {
info->is_steam_deck = strstr((const char*)gl_renderer, "vangogh") != NULL; info->is_steam_deck = strstr((const char*)gl_renderer, "vangogh") != NULL;
} }
if(gl_version) {
const char *mesa_p = strstr((const char*)gl_version, "Mesa ");
if(mesa_p) {
mesa_p += 5;
int major = 0;
int minor = 0;
int patch = 0;
if(sscanf(mesa_p, "%d.%d.%d", &major, &minor, &patch) == 3) {
info->driver_major = major;
info->driver_minor = minor;
info->driver_patch = patch;
}
}
}
end: end:
return supported; return supported;
} }