Revert "WIP: use compute shader instead of graphics shader for better performance (especially on amd)"

This reverts commit a41a32cb90.
This commit is contained in:
dec05eba
2025-03-29 15:37:53 +01:00
parent a41a32cb90
commit f85a7ab205
18 changed files with 746 additions and 331 deletions

View File

@@ -2,12 +2,9 @@
#define GSR_COLOR_CONVERSION_H
#include "shader.h"
#include "defs.h"
#include "vec2.h"
#include <stdbool.h>
#define GSR_COLOR_CONVERSION_MAX_SHADERS 3
typedef enum {
GSR_COLOR_RANGE_LIMITED,
GSR_COLOR_RANGE_FULL
@@ -29,18 +26,9 @@ typedef enum {
GSR_DESTINATION_COLOR_RGB8
} gsr_destination_color;
typedef enum {
GSR_ROT_0,
GSR_ROT_90,
GSR_ROT_180,
GSR_ROT_270
} gsr_rotation;
typedef struct {
int rotation_matrix;
int source_position;
int target_position;
int scale;
int offset;
int rotation;
} gsr_color_uniforms;
typedef struct {
@@ -57,23 +45,19 @@ typedef struct {
typedef struct {
gsr_color_conversion_params params;
gsr_color_uniforms uniforms[GSR_COLOR_CONVERSION_MAX_SHADERS];
gsr_shader shaders[GSR_COLOR_CONVERSION_MAX_SHADERS];
gsr_color_uniforms uniforms[4];
gsr_shader shaders[4];
unsigned int framebuffers[2];
unsigned int vertex_array_object_id;
unsigned int vertex_buffer_object_id;
int max_local_size_dim;
} gsr_color_conversion;
int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conversion_params *params);
void gsr_color_conversion_deinit(gsr_color_conversion *self);
void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color);
void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color);
void gsr_color_conversion_clear(gsr_color_conversion *self);
gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation);
#endif /* GSR_COLOR_CONVERSION_H */

View File

@@ -98,7 +98,7 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_TEXTURE_EXTERNAL_OES 0x8D65
#define GL_RED 0x1903
#define GL_GREEN 0x1904
#define GL_BLUE 0x1905
#define GL_BLUE 0x1905
#define GL_ALPHA 0x1906
#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46
#define GL_RG 0x8227
@@ -111,7 +111,6 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_R16 0x822A
#define GL_RG16 0x822C
#define GL_RGB16 0x8054
#define GL_RGBA32F 0x8814
#define GL_UNSIGNED_BYTE 0x1401
#define GL_COLOR_BUFFER_BIT 0x00004000
#define GL_TEXTURE_WRAP_S 0x2802
@@ -135,10 +134,6 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_SCISSOR_TEST 0x0C11
#define GL_PACK_ALIGNMENT 0x0D05
#define GL_UNPACK_ALIGNMENT 0x0CF5
#define GL_READ_ONLY 0x88B8
#define GL_WRITE_ONLY 0x88B9
#define GL_READ_WRITE 0x88BA
#define GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS 0x90EB
#define GL_VENDOR 0x1F00
#define GL_RENDERER 0x1F01
@@ -148,7 +143,6 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_INFO_LOG_LENGTH 0x8B84
#define GL_FRAGMENT_SHADER 0x8B30
#define GL_VERTEX_SHADER 0x8B31
#define GL_COMPUTE_SHADER 0x91B9
#define GL_COMPILE_STATUS 0x8B81
#define GL_LINK_STATUS 0x8B82
@@ -237,7 +231,6 @@ struct gsr_egl {
void (*glGenTextures)(int n, unsigned int *textures);
void (*glDeleteTextures)(int n, const unsigned int *texture);
void (*glBindTexture)(unsigned int target, unsigned int texture);
void (*glBindImageTexture)(unsigned int unit, unsigned int texture, int level, unsigned char layered, int layer, unsigned int access, unsigned int format);
void (*glTexParameteri)(unsigned int target, unsigned int pname, int param);
void (*glTexParameteriv)(unsigned int target, unsigned int pname, const int *params);
void (*glGetTexLevelParameteriv)(unsigned int target, int level, unsigned int pname, int *params);
@@ -247,8 +240,6 @@ struct gsr_egl {
void (*glGenFramebuffers)(int n, unsigned int *framebuffers);
void (*glBindFramebuffer)(unsigned int target, unsigned int framebuffer);
void (*glDeleteFramebuffers)(int n, const unsigned int *framebuffers);
void (*glDispatchCompute)(unsigned int num_groups_x, unsigned int num_groups_y, unsigned int num_groups_z);
void (*glMemoryBarrier)(unsigned int barriers);
void (*glViewport)(int x, int y, int width, int height);
void (*glFramebufferTexture2D)(unsigned int target, unsigned int attachment, unsigned int textarget, unsigned int texture, int level);
void (*glDrawBuffers)(int n, const unsigned int *bufs);
@@ -285,14 +276,11 @@ struct gsr_egl {
int (*glGetUniformLocation)(unsigned int program, const char *name);
void (*glUniform1f)(int location, float v0);
void (*glUniform2f)(int location, float v0, float v1);
void (*glUniform2i)(int location, int v0, int v1);
void (*glUniformMatrix2fv)(int location, int count, unsigned char transpose, const float *value);
void (*glDebugMessageCallback)(GLDEBUGPROC callback, const void *userParam);
void (*glScissor)(int x, int y, int width, int height);
void (*glReadPixels)(int x, int y, int width, int height, unsigned int format, unsigned int type, void *pixels);
void* (*glMapBuffer)(unsigned int target, unsigned int access);
unsigned char (*glUnmapBuffer)(unsigned int target);
void (*glGetIntegerv)(unsigned int pname, int *params);
};
bool gsr_egl_load(gsr_egl *self, gsr_window *window, bool is_monitor_capture, bool enable_debug);

View File

@@ -25,7 +25,7 @@ typedef struct {
} gsr_image_writer;
bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int width, int height);
/* |memory| is taken as a reference. The data is expected to be in rgba8 format (8 bit rgba) */
/* |memory| is taken as a reference */
bool gsr_image_writer_init_memory(gsr_image_writer *self, const void *memory, int width, int height);
void gsr_image_writer_deinit(gsr_image_writer *self);

View File

@@ -9,7 +9,7 @@ typedef struct {
} gsr_shader;
/* |vertex_shader| or |fragment_shader| may be NULL */
int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader);
int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader);
void gsr_shader_deinit(gsr_shader *self);
int gsr_shader_bind_attribute_location(gsr_shader *self, const char *attribute, int location);

View File

@@ -64,6 +64,8 @@ int create_directory_recursive(char *path);
/* |img_attr| needs to be at least 44 in size */
void setup_dma_buf_attrs(intptr_t *img_attr, uint32_t format, uint32_t width, uint32_t height, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes, bool use_modifier);
bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context);
bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes);
bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame);
vec2i scale_keep_aspect_ratio(vec2i from, vec2i to);

View File

@@ -53,6 +53,10 @@ typedef struct {
bool is_x11;
gsr_cursor x11_cursor;
bool performance_error_shown;
bool fast_path_failed;
bool mesa_supports_compute_only_vaapi_copy;
//int drm_fd;
//uint64_t prev_sequence;
//bool damaged;
@@ -225,6 +229,17 @@ static int gsr_capture_kms_start(gsr_capture *cap, gsr_capture_metadata *capture
capture_metadata->height = self->capture_size.y;
}
self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
if(self->fast_path_failed)
fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
//if(self->params.hdr) {
// self->fast_path_failed = true;
// fprintf(stderr, "gsr warning: gsr_capture_kms_start: recording with hdr requires shader color conversion which might be slow. If this is an issue record with -w portal instead (which converts HDR to SDR)\n");
//}
self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6);
self->last_time_monitor_check = clock_get_monotonic_seconds();
return 0;
}
@@ -259,6 +274,16 @@ static void gsr_capture_kms_on_event(gsr_capture *cap, gsr_egl *egl) {
// }
// }
static float monitor_rotation_to_radians(gsr_monitor_rotation rot) {
switch(rot) {
case GSR_MONITOR_ROT_0: return 0.0f;
case GSR_MONITOR_ROT_90: return M_PI_2;
case GSR_MONITOR_ROT_180: return M_PI;
case GSR_MONITOR_ROT_270: return M_PI + M_PI_2;
}
return 0.0f;
}
static gsr_kms_response_item* find_drm_by_connector_id(gsr_kms_response *kms_response, uint32_t connector_id) {
for(int i = 0; i < kms_response->num_items; ++i) {
if(kms_response->items[i].connector_id == connector_id && !kms_response->items[i].is_cursor)
@@ -424,7 +449,7 @@ static gsr_kms_response_item* find_cursor_drm_if_on_monitor(gsr_capture_kms *sel
return cursor_drm_fd;
}
static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, vec2i output_size, vec2i framebuffer_size) {
static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, float texture_rotation, vec2i output_size, vec2i framebuffer_size) {
const vec2d scale = {
self->capture_size.x == 0 ? 0 : (double)output_size.x / (double)self->capture_size.x,
self->capture_size.y == 0 ? 0 : (double)output_size.y / (double)self->capture_size.y
@@ -498,7 +523,7 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color
gsr_color_conversion_draw(color_conversion, self->cursor_texture_id,
cursor_pos, (vec2i){cursor_size.x * scale.x, cursor_size.y * scale.y},
(vec2i){0, 0}, cursor_size,
gsr_monitor_rotation_to_rotation(self->monitor_rotation), cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB);
texture_rotation, cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
@@ -526,7 +551,7 @@ static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color
gsr_color_conversion_draw(color_conversion, self->x11_cursor.texture_id,
cursor_pos, (vec2i){self->x11_cursor.size.x * scale.x, self->x11_cursor.size.y * scale.y},
(vec2i){0, 0}, self->x11_cursor.size,
GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
0.0f, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
@@ -579,6 +604,16 @@ static void gsr_capture_kms_update_connector_ids(gsr_capture_kms *self) {
self->capture_size = rotate_capture_size_if_rotated(self, monitor.size);
}
static void gsr_capture_kms_fail_fast_path_if_not_fast(gsr_capture_kms *self, uint32_t pixel_format) {
const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF;
if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) {
self->fast_path_failed = true;
fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used."
" If you experience performance problems in the video then record on a single window on X11 or use portal capture option instead or disable 10/12-bit color option in your desktop environment settings,"
" or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n");
}
}
static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) {
gsr_capture_kms *self = cap->priv;
@@ -610,6 +645,15 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
if(drm_fd->has_hdr_metadata && self->params.hdr && hdr_metadata_is_supported_format(&drm_fd->hdr_metadata))
gsr_kms_set_hdr_metadata(self, drm_fd);
if(!self->performance_error_shown && self->monitor_rotation != GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
self->performance_error_shown = true;
self->fast_path_failed = true;
fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is rotated, composition will have to be used."
" If you experience performance problems in the video then record a single window on X11 or use portal capture option instead\n");
}
gsr_capture_kms_fail_fast_path_if_not_fast(self, drm_fd->pixel_format);
self->capture_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h });
if(self->params.region_size.x > 0 && self->params.region_size.y > 0)
self->capture_size = self->params.region_size;
@@ -618,6 +662,7 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size;
output_size = scale_keep_aspect_ratio(self->capture_size, output_size);
const float texture_rotation = monitor_rotation_to_radians(self->monitor_rotation);
const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) };
gsr_capture_kms_update_capture_size_change(self, color_conversion, target_pos, drm_fd);
@@ -628,19 +673,41 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
capture_pos.x += self->params.region_position.x;
capture_pos.y += self->params.region_position.y;
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd);
if(image) {
gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image);
self->params.egl->eglDestroyImage(self->params.egl->egl_display, image);
/* Fast opengl free path */
if(!self->fast_path_failed && self->monitor_rotation == GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
int fds[4];
uint32_t offsets[4];
uint32_t pitches[4];
uint64_t modifiers[4];
for(int i = 0; i < drm_fd->num_dma_bufs; ++i) {
fds[i] = drm_fd->dma_buf[i].fd;
offsets[i] = drm_fd->dma_buf[i].offset;
pitches[i] = drm_fd->dma_buf[i].pitch;
modifiers[i] = drm_fd->modifier;
}
if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){capture_pos.x, capture_pos.y}, self->capture_size, target_pos, output_size, drm_fd->pixel_format, (vec2i){drm_fd->width, drm_fd->height}, fds, offsets, pitches, modifiers, drm_fd->num_dma_bufs)) {
fprintf(stderr, "gsr error: gsr_capture_kms_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
self->fast_path_failed = true;
}
} else {
self->fast_path_failed = true;
}
gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id,
target_pos, output_size,
capture_pos, self->capture_size,
gsr_monitor_rotation_to_rotation(self->monitor_rotation), self->external_texture_fallback, GSR_SOURCE_COLOR_RGB);
if(self->fast_path_failed) {
EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd);
if(image) {
gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image);
self->params.egl->eglDestroyImage(self->params.egl->egl_display, image);
}
gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id,
target_pos, output_size,
capture_pos, self->capture_size,
texture_rotation, self->external_texture_fallback, GSR_SOURCE_COLOR_RGB);
}
if(self->params.record_cursor) {
gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane);
@@ -655,12 +722,12 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
render_x11_cursor(self, color_conversion, cursor_monitor_offset, target_pos, output_size);
} else if(cursor_drm_fd) {
const vec2i framebuffer_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h });
render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, output_size, framebuffer_size);
render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, texture_rotation, output_size, framebuffer_size);
}
}
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
gsr_capture_kms_cleanup_kms_fds(self);

View File

@@ -390,16 +390,16 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, gsr_capture_metadata *cap
return 0;
}
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
gsr_color_conversion_draw(color_conversion, self->setup_params.dwTextures[grab_params.dwTextureIndex],
target_pos, (vec2i){output_size.x, output_size.y},
self->params.region_position, frame_size,
GSR_ROT_0, false, GSR_SOURCE_COLOR_BGR);
0.0f, false, GSR_SOURCE_COLOR_BGR);
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
return 0;
}

View File

@@ -23,6 +23,9 @@ typedef struct {
vec2i capture_size;
gsr_pipewire_video_dmabuf_data dmabuf_data[GSR_PIPEWIRE_VIDEO_DMABUF_MAX_PLANES];
int num_dmabuf_data;
bool fast_path_failed;
bool mesa_supports_compute_only_vaapi_copy;
} gsr_capture_portal;
static void gsr_capture_portal_cleanup_plane_fds(gsr_capture_portal *self) {
@@ -302,6 +305,12 @@ static int gsr_capture_portal_start(gsr_capture *cap, gsr_capture_metadata *capt
capture_metadata->height = self->params.output_resolution.y;
}
self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
if(self->fast_path_failed)
fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6);
return 0;
}
@@ -309,6 +318,16 @@ static int max_int(int a, int b) {
return a > b ? a : b;
}
static void gsr_capture_portal_fail_fast_path_if_not_fast(gsr_capture_portal *self, uint32_t pixel_format) {
const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF;
if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) {
self->fast_path_failed = true;
fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used."
" If you experience performance problems in the video then record on a single window on X11 instead or disable 10/12-bit color option in your desktop environment settings,"
" or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n");
}
}
static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) {
(void)color_conversion;
gsr_capture_portal *self = cap->priv;
@@ -329,21 +348,45 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca
return 0;
}
gsr_capture_portal_fail_fast_path_if_not_fast(self, pipewire_fourcc);
const bool is_scaled = self->params.output_resolution.x > 0 && self->params.output_resolution.y > 0;
vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size;
output_size = scale_keep_aspect_ratio(self->capture_size, output_size);
const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) };
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
// TODO: Handle region crop
gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id,
target_pos, output_size,
(vec2i){region.x, region.y}, self->capture_size,
GSR_ROT_0, using_external_image, GSR_SOURCE_COLOR_RGB);
/* Fast opengl free path */
if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
int fds[4];
uint32_t offsets[4];
uint32_t pitches[4];
uint64_t modifiers[4];
for(int i = 0; i < self->num_dmabuf_data; ++i) {
fds[i] = self->dmabuf_data[i].fd;
offsets[i] = self->dmabuf_data[i].offset;
pitches[i] = self->dmabuf_data[i].stride;
modifiers[i] = pipewire_modifiers;
}
if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){region.x, region.y}, self->capture_size, target_pos, output_size, pipewire_fourcc, self->capture_size, fds, offsets, pitches, modifiers, self->num_dmabuf_data)) {
fprintf(stderr, "gsr error: gsr_capture_portal_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
self->fast_path_failed = true;
}
} else {
self->fast_path_failed = true;
}
if(self->fast_path_failed) {
gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id,
target_pos, output_size,
(vec2i){region.x, region.y}, self->capture_size,
0.0f, using_external_image, GSR_SOURCE_COLOR_RGB);
}
if(self->params.record_cursor && self->texture_map.cursor_texture_id > 0 && cursor_region.width > 0) {
const vec2d scale = {
@@ -361,12 +404,12 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->texture_map.cursor_texture_id,
(vec2i){cursor_pos.x, cursor_pos.y}, (vec2i){cursor_region.width * scale.x, cursor_region.height * scale.y},
(vec2i){0, 0}, (vec2i){cursor_region.width, cursor_region.height},
GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
0.0f, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
gsr_capture_portal_cleanup_plane_fds(self);

View File

@@ -34,6 +34,7 @@ typedef struct {
gsr_cursor cursor;
bool clear_background;
bool fast_path_failed;
} gsr_capture_xcomposite;
static void gsr_capture_xcomposite_stop(gsr_capture_xcomposite *self) {
@@ -116,6 +117,10 @@ static int gsr_capture_xcomposite_start(gsr_capture *cap, gsr_capture_metadata *
capture_metadata->height = self->params.output_resolution.y;
}
self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
if(self->fast_path_failed)
fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
self->window_resize_timer = clock_get_monotonic_seconds();
return 0;
}
@@ -253,13 +258,25 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata
const vec2i target_pos = { max_int(0, capture_metdata->width / 2 - output_size.x / 2), max_int(0, capture_metdata->height / 2 - output_size.y / 2) };
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture),
target_pos, output_size,
(vec2i){0, 0}, self->texture_size,
GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
/* Fast opengl free path */
if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metdata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
if(!vaapi_copy_egl_image_to_video_surface(self->params.egl, self->window_texture.image, (vec2i){0, 0}, self->texture_size, target_pos, output_size, capture_metdata->video_codec_context, capture_metdata->frame)) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_capture: vaapi_copy_egl_image_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
self->fast_path_failed = true;
}
} else {
self->fast_path_failed = true;
}
if(self->fast_path_failed) {
gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture),
target_pos, output_size,
(vec2i){0, 0}, self->texture_size,
0.0f, false, GSR_SOURCE_COLOR_RGB);
}
if(self->params.record_cursor && self->cursor.visible) {
const vec2d scale = {
@@ -280,13 +297,13 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata
gsr_color_conversion_draw(color_conversion, self->cursor.texture_id,
cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y},
(vec2i){0, 0}, self->cursor.size,
GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
0.0f, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
return 0;
}

View File

@@ -160,7 +160,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->texture_id,
target_pos, output_size,
(vec2i){0, 0}, self->capture_size,
GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
0.0f, false, GSR_SOURCE_COLOR_RGB);
if(self->params.record_cursor && self->cursor.visible) {
const vec2d scale = {
@@ -181,7 +181,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->cursor.texture_id,
cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y},
(vec2i){0, 0}, self->cursor.size,
GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
0.0f, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}

View File

@@ -5,17 +5,21 @@
#include <math.h>
#include <assert.h>
// TODO: external texture
// TODO: Scissor doesn't work with compute shader. In the compute shader this can be implemented with two step calls, and using the result
// with a call to mix to choose source/output color.
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
// TODO: Use the minimal barrier required and move this to egl.h
#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
#define MAX_SHADERS 4
#define MAX_FRAMEBUFFERS 2
#define EXTERNAL_TEXTURE_SHADER_OFFSET 2
static float abs_f(float v) {
return v >= 0.0f ? v : -v;
}
#define ROTATE_Z "mat4 rotate_z(in float angle) {\n" \
" return mat4(cos(angle), -sin(angle), 0.0, 0.0,\n" \
" sin(angle), cos(angle), 0.0, 0.0,\n" \
" 0.0, 0.0, 1.0, 0.0,\n" \
" 0.0, 0.0, 0.0, 1.0);\n" \
"}\n"
/* https://en.wikipedia.org/wiki/YCbCr, see study/color_space_transform_matrix.png */
/* ITU-R BT2020, full */
@@ -44,10 +48,6 @@
" 0.060118, 0.429412, -0.038049, 0.000000,\n" \
" 0.062745, 0.500000, 0.500000, 1.000000);\n"
static int max_int(int a, int b) {
return a > b ? a : b;
}
static const char* color_format_range_get_transform_matrix(gsr_destination_color color_format, gsr_color_range color_range) {
switch(color_format) {
case GSR_DESTINATION_COLOR_NV12: {
@@ -76,111 +76,187 @@ static const char* color_format_range_get_transform_matrix(gsr_destination_color
return NULL;
}
// TODO: Make alpha blending optional
// TODO: Optimize these shaders.
static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) {
static int load_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) {
const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range);
const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010;
char compute_shader[2048];
snprintf(compute_shader, sizeof(compute_shader),
"#version 430 core\n"
"layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
"uniform sampler2D imgInput;\n"
"uniform ivec2 source_position;\n"
"uniform ivec2 target_position;\n"
"uniform vec2 scale;\n"
"uniform mat2 rotation_matrix;\n"
"layout(%s, binding = 0) uniform image2D imgOutput;\n"
"%s"
"void main() {\n"
" ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
" ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
" vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n"
" vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
" vec4 source_color = texture(imgInput, texCoord);\n"
" vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n"
" vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n"
" float y_color = mix(output_color_yuv.r, source_color_yuv.r, source_color.a);\n"
" imageStore(imgOutput, texelCoord + target_position, vec4(y_color, 1.0, 1.0, 1.0));\n"
"}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "r16" : "r8", color_transform_matrix);
char vertex_shader[2048];
snprintf(vertex_shader, sizeof(vertex_shader),
"#version 300 es \n"
"in vec2 pos; \n"
"in vec2 texcoords; \n"
"out vec2 texcoords_out; \n"
"uniform vec2 offset; \n"
"uniform float rotation; \n"
ROTATE_Z
"void main() \n"
"{ \n"
" texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
" gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n"
"} \n");
if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
const char *main_code =
main_code =
" vec4 pixel = texture(tex1, texcoords_out); \n"
" FragColor.x = (RGBtoYUV * vec4(pixel.rgb, 1.0)).x; \n"
" FragColor.w = pixel.a; \n";
char fragment_shader[2048];
if(external_texture) {
snprintf(fragment_shader, sizeof(fragment_shader),
"#version 300 es \n"
"#extension GL_OES_EGL_image_external : enable \n"
"#extension GL_OES_EGL_image_external_essl3 : require \n"
"precision highp float; \n"
"in vec2 texcoords_out; \n"
"uniform samplerExternalOES tex1; \n"
"out vec4 FragColor; \n"
"%s"
"void main() \n"
"{ \n"
"%s"
"} \n", color_transform_matrix, main_code);
} else {
snprintf(fragment_shader, sizeof(fragment_shader),
"#version 300 es \n"
"precision highp float; \n"
"in vec2 texcoords_out; \n"
"uniform sampler2D tex1; \n"
"out vec4 FragColor; \n"
"%s"
"void main() \n"
"{ \n"
"%s"
"} \n", color_transform_matrix, main_code);
}
if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
return -1;
uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
gsr_shader_bind_attribute_location(shader, "pos", 0);
gsr_shader_bind_attribute_location(shader, "texcoords", 1);
uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
return 0;
}
static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) {
static unsigned int load_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) {
const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range);
const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010;
char compute_shader[2048];
snprintf(compute_shader, sizeof(compute_shader),
"#version 430 core\n"
"layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
"uniform sampler2D imgInput;\n"
"uniform ivec2 source_position;\n"
"uniform ivec2 target_position;\n"
"uniform vec2 scale;\n"
"uniform mat2 rotation_matrix;\n"
"layout(%s, binding = 0) uniform image2D imgOutput;\n"
"%s"
"void main() {\n"
" ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
" ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
" vec2 rotated_texel_coord = vec2(texelCoord - source_position/2 - size/4) * rotation_matrix + vec2(size/4) + 0.5;\n"
" vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
" vec4 source_color = texture(imgInput, texCoord * 2.0);\n"
" vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n"
" vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position/2);\n"
" vec2 uv_color = mix(output_color_yuv.rg, source_color_yuv.gb, source_color.a);\n"
" imageStore(imgOutput, texelCoord + target_position/2, vec4(uv_color, 1.0, 1.0));\n"
"}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "rg16" : "rg8", color_transform_matrix);
char vertex_shader[2048];
snprintf(vertex_shader, sizeof(vertex_shader),
"#version 300 es \n"
"in vec2 pos; \n"
"in vec2 texcoords; \n"
"out vec2 texcoords_out; \n"
"uniform vec2 offset; \n"
"uniform float rotation; \n"
ROTATE_Z
"void main() \n"
"{ \n"
" texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
" gl_Position = (vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0)) * vec4(0.5, 0.5, 1.0, 1.0) - vec4(0.5, 0.5, 0.0, 0.0); \n"
"} \n");
if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
const char *main_code =
main_code =
" vec4 pixel = texture(tex1, texcoords_out); \n"
" FragColor.xy = (RGBtoYUV * vec4(pixel.rgb, 1.0)).yz; \n"
" FragColor.w = pixel.a; \n";
char fragment_shader[2048];
if(external_texture) {
snprintf(fragment_shader, sizeof(fragment_shader),
"#version 300 es \n"
"#extension GL_OES_EGL_image_external : enable \n"
"#extension GL_OES_EGL_image_external_essl3 : require \n"
"precision highp float; \n"
"in vec2 texcoords_out; \n"
"uniform samplerExternalOES tex1; \n"
"out vec4 FragColor; \n"
"%s"
"void main() \n"
"{ \n"
"%s"
"} \n", color_transform_matrix, main_code);
} else {
snprintf(fragment_shader, sizeof(fragment_shader),
"#version 300 es \n"
"precision highp float; \n"
"in vec2 texcoords_out; \n"
"uniform sampler2D tex1; \n"
"out vec4 FragColor; \n"
"%s"
"void main() \n"
"{ \n"
"%s"
"} \n", color_transform_matrix, main_code);
}
if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
return -1;
uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
gsr_shader_bind_attribute_location(shader, "pos", 0);
gsr_shader_bind_attribute_location(shader, "texcoords", 1);
uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
return 0;
}
static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim) {
char compute_shader[2048];
snprintf(compute_shader, sizeof(compute_shader),
"#version 430 core\n"
"layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
"uniform sampler2D imgInput;\n"
"uniform ivec2 source_position;\n"
"uniform ivec2 target_position;\n"
"uniform vec2 scale;\n"
"uniform mat2 rotation_matrix;\n"
"layout(rgba8, binding = 0) uniform image2D imgOutput;\n"
"void main() {\n"
" ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
" ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
" vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n"
" vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
" vec4 source_color = texture(imgInput, texCoord);\n"
//" vec4 output_color = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n"
//" vec3 color = mix(output_color.rgb, source_color.rgb, source_color.a);\n"
" imageStore(imgOutput, texelCoord + target_position, source_color);\n"
"}\n", max_local_size_dim, max_local_size_dim);
static unsigned int load_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, bool external_texture) {
char vertex_shader[2048];
snprintf(vertex_shader, sizeof(vertex_shader),
"#version 300 es \n"
"in vec2 pos; \n"
"in vec2 texcoords; \n"
"out vec2 texcoords_out; \n"
"uniform vec2 offset; \n"
"uniform float rotation; \n"
ROTATE_Z
"void main() \n"
"{ \n"
" texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
" gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n"
"} \n");
if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
const char *main_code =
main_code =
" vec4 pixel = texture(tex1, texcoords_out); \n"
" FragColor = pixel; \n";
char fragment_shader[2048];
if(external_texture) {
snprintf(fragment_shader, sizeof(fragment_shader),
"#version 300 es \n"
"#extension GL_OES_EGL_image_external : enable \n"
"#extension GL_OES_EGL_image_external_essl3 : require \n"
"precision highp float; \n"
"in vec2 texcoords_out; \n"
"uniform samplerExternalOES tex1; \n"
"out vec4 FragColor; \n"
"void main() \n"
"{ \n"
"%s"
"} \n", main_code);
} else {
snprintf(fragment_shader, sizeof(fragment_shader),
"#version 300 es \n"
"precision highp float; \n"
"in vec2 texcoords_out; \n"
"uniform sampler2D tex1; \n"
"out vec4 FragColor; \n"
"void main() \n"
"{ \n"
"%s"
"} \n", main_code);
}
if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
return -1;
uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
gsr_shader_bind_attribute_location(shader, "pos", 0);
gsr_shader_bind_attribute_location(shader, "texcoords", 1);
uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
return 0;
}
@@ -239,11 +315,6 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
memset(self, 0, sizeof(*self));
self->params.egl = params->egl;
self->params = *params;
int max_compute_work_group_invocations = 256;
self->params.egl->glGetIntegerv(GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS, &max_compute_work_group_invocations);
self->max_local_size_dim = sqrt(max_compute_work_group_invocations);
fprintf(stderr, "max local size: %d, max_local_size_dim: %d\n", max_compute_work_group_invocations, self->max_local_size_dim);
switch(params->destination_color) {
case GSR_DESTINATION_COLOR_NV12:
@@ -253,15 +324,27 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
return -1;
}
if(load_compute_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], self->max_local_size_dim, params->destination_color, params->color_range) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n");
if(load_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], params->destination_color, params->color_range, false) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
goto err;
}
if(load_compute_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], self->max_local_size_dim, params->destination_color, params->color_range) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n");
if(load_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], params->destination_color, params->color_range, false) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n");
goto err;
}
if(self->params.load_external_image_shader) {
if(load_shader_y(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], params->destination_color, params->color_range, true) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
goto err;
}
if(load_shader_uv(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], params->destination_color, params->color_range, true) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n");
goto err;
}
}
break;
}
case GSR_DESTINATION_COLOR_RGB8: {
@@ -270,10 +353,17 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
return -1;
}
if(load_compute_shader_rgb(&self->shaders[2], self->params.egl, &self->uniforms[2], self->max_local_size_dim) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n");
if(load_shader_rgb(&self->shaders[0], self->params.egl, &self->uniforms[0], false) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
goto err;
}
if(self->params.load_external_image_shader) {
if(load_shader_rgb(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], true) != 0) {
fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
goto err;
}
}
break;
}
}
@@ -310,119 +400,127 @@ void gsr_color_conversion_deinit(gsr_color_conversion *self) {
self->framebuffers[i] = 0;
}
for(int i = 0; i < GSR_COLOR_CONVERSION_MAX_SHADERS; ++i) {
for(int i = 0; i < MAX_SHADERS; ++i) {
gsr_shader_deinit(&self->shaders[i]);
}
self->params.egl = NULL;
}
static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rotation_matrix[2][2], vec2i *source_position, vec2i texture_size, vec2f scale) {
/*
rotation_matrix[0][0] = cos(angle);
rotation_matrix[0][1] = -sin(angle);
rotation_matrix[1][0] = sin(angle);
rotation_matrix[1][1] = cos(angle);
The manual matrix code below is the same as this code above, but without floating-point errors.
This is done to remove any blurring caused by these floating-point errors.
*/
switch(rotation) {
case GSR_ROT_0:
rotation_matrix[0][0] = 1.0f;
rotation_matrix[0][1] = 0.0f;
rotation_matrix[1][0] = 0.0f;
rotation_matrix[1][1] = 1.0f;
break;
case GSR_ROT_90:
rotation_matrix[0][0] = 0.0f;
rotation_matrix[0][1] = -1.0f;
rotation_matrix[1][0] = 1.0f;
rotation_matrix[1][1] = 0.0f;
source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5);
source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5);
break;
case GSR_ROT_180:
rotation_matrix[0][0] = -1.0f;
rotation_matrix[0][1] = 0.0f;
rotation_matrix[1][0] = 0.0f;
rotation_matrix[1][1] = -1.0f;
break;
case GSR_ROT_270:
rotation_matrix[0][0] = 0.0f;
rotation_matrix[0][1] = 1.0f;
rotation_matrix[1][0] = -1.0f;
rotation_matrix[1][1] = 0.0f;
source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5);
source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5);
break;
static void gsr_color_conversion_swizzle_texture_source(gsr_color_conversion *self, gsr_source_color source_color) {
if(source_color == GSR_SOURCE_COLOR_BGR) {
const int swizzle_mask[] = { GL_BLUE, GL_GREEN, GL_RED, 1 };
self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask);
}
}
// TODO: Handle source_color
void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color) {
vec2f scale = {0.0f, 0.0f};
if(texture_size.x > 0 && texture_size.y > 0)
scale = (vec2f){ (double)destination_size.x/(double)texture_size.x, (double)destination_size.y/(double)texture_size.y };
static void gsr_color_conversion_swizzle_reset(gsr_color_conversion *self, gsr_source_color source_color) {
if(source_color == GSR_SOURCE_COLOR_BGR) {
const int swizzle_mask[] = { GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA };
self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask);
}
}
vec2i source_position = {0, 0};
float rotation_matrix[2][2] = {{0, 0}, {0, 0}};
gsr_color_conversion_apply_rotation(rotation, rotation_matrix, &source_position, texture_size, scale);
/* |source_pos| is in pixel coordinates and |source_size| */
void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color) {
// TODO: Remove this crap
rotation = M_PI*2.0f - rotation;
source_position.x += texture_pos.x;
source_position.y += texture_pos.y;
/* TODO: Do not call this every frame? */
vec2i dest_texture_size = {0, 0};
self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[0]);
self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &dest_texture_size.x);
self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &dest_texture_size.y);
self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
const int texture_target = external_texture ? GL_TEXTURE_EXTERNAL_OES : GL_TEXTURE_2D;
self->params.egl->glBindTexture(texture_target, texture_id);
switch(self->params.destination_color) {
case GSR_DESTINATION_COLOR_NV12:
case GSR_DESTINATION_COLOR_P010: {
const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010;
// Y
{
gsr_shader_use(&self->shaders[0]);
self->params.egl->glUniformMatrix2fv(self->uniforms[0].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
self->params.egl->glUniform2i(self->uniforms[0].source_position, source_position.x, source_position.y);
self->params.egl->glUniform2i(self->uniforms[0].target_position, destination_pos.x, destination_pos.y);
self->params.egl->glUniform2f(self->uniforms[0].scale, scale.x, scale.y);
self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_R16 : GL_R8);
const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5;
const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5;
self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
}
// UV
{
gsr_shader_use(&self->shaders[1]);
self->params.egl->glUniformMatrix2fv(self->uniforms[1].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
self->params.egl->glUniform2i(self->uniforms[1].source_position, source_position.x, source_position.y);
self->params.egl->glUniform2i(self->uniforms[1].target_position, destination_pos.x, destination_pos.y);
self->params.egl->glUniform2f(self->uniforms[1].scale, scale.x, scale.y);
self->params.egl->glBindImageTexture(0, self->params.destination_textures[1], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_RG16 : GL_RG8);
const double num_groups_x = (double)texture_size.x*0.5/(double)self->max_local_size_dim + 0.5;
const double num_groups_y = (double)texture_size.y*0.5/(double)self->max_local_size_dim + 0.5;
self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
}
break;
}
case GSR_DESTINATION_COLOR_RGB8: {
gsr_shader_use(&self->shaders[2]);
self->params.egl->glUniformMatrix2fv(self->uniforms[2].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
self->params.egl->glUniform2i(self->uniforms[2].source_position, source_position.x, source_position.y);
self->params.egl->glUniform2i(self->uniforms[2].target_position, destination_pos.x, destination_pos.y);
self->params.egl->glUniform2f(self->uniforms[2].scale, scale.x, scale.y);
self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8);
const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5;
const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5;
self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
break;
}
vec2i source_texture_size = {0, 0};
if(external_texture) {
assert(self->params.load_external_image_shader);
source_texture_size = source_size;
} else {
/* TODO: Do not call this every frame? */
self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_WIDTH, &source_texture_size.x);
self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_HEIGHT, &source_texture_size.y);
}
self->params.egl->glMemoryBarrier(GL_ALL_BARRIER_BITS); // GL_SHADER_IMAGE_ACCESS_BARRIER_BIT
self->params.egl->glUseProgram(0);
// TODO: Remove this crap
if(abs_f(M_PI * 0.5f - rotation) <= 0.001f || abs_f(M_PI * 1.5f - rotation) <= 0.001f) {
float tmp = source_texture_size.x;
source_texture_size.x = source_texture_size.y;
source_texture_size.y = tmp;
}
const vec2f pos_norm = {
((float)source_pos.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f,
((float)source_pos.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f,
};
const vec2f size_norm = {
((float)source_size.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f,
((float)source_size.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f,
};
const vec2f texture_pos_norm = {
(float)texture_pos.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x),
(float)texture_pos.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y),
};
const vec2f texture_size_norm = {
(float)texture_size.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x),
(float)texture_size.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y),
};
const float vertices[] = {
-1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y,
-1.0f + 0.0f, -1.0f + 0.0f, texture_pos_norm.x, texture_pos_norm.y,
-1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y,
-1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y,
-1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y,
-1.0f + 0.0f + size_norm.x, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y + texture_size_norm.y
};
gsr_color_conversion_swizzle_texture_source(self, source_color);
self->params.egl->glBindVertexArray(self->vertex_array_object_id);
self->params.egl->glViewport(0, 0, dest_texture_size.x, dest_texture_size.y);
/* TODO: this, also cleanup */
//self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, self->vertex_buffer_object_id);
self->params.egl->glBufferSubData(GL_ARRAY_BUFFER, 0, 24 * sizeof(float), vertices);
{
self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[0]);
//cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); // TODO: Do this in a separate clear_ function. We want to do that when using multiple drm to create the final image (multiple monitors for example)
const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET : 0;
gsr_shader_use(&self->shaders[shader_index]);
self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation);
self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y);
self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6);
}
if(self->params.num_destination_textures > 1) {
self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]);
//cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT);
const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET + 1 : 1;
gsr_shader_use(&self->shaders[shader_index]);
self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation);
self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y);
self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6);
}
self->params.egl->glBindVertexArray(0);
gsr_shader_use_none(&self->shaders[0]);
self->params.egl->glBindTexture(texture_target, 0);
self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0);
gsr_color_conversion_swizzle_reset(self, source_color);
}
void gsr_color_conversion_clear(gsr_color_conversion *self) {
@@ -459,7 +557,3 @@ void gsr_color_conversion_clear(gsr_color_conversion *self) {
self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation) {
return (gsr_rotation)monitor_rotation;
}

View File

@@ -284,7 +284,6 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGenTextures, "glGenTextures" },
{ (void**)&self->glDeleteTextures, "glDeleteTextures" },
{ (void**)&self->glBindTexture, "glBindTexture" },
{ (void**)&self->glBindImageTexture, "glBindImageTexture" },
{ (void**)&self->glTexParameteri, "glTexParameteri" },
{ (void**)&self->glTexParameteriv, "glTexParameteriv" },
{ (void**)&self->glGetTexLevelParameteriv, "glGetTexLevelParameteriv" },
@@ -294,8 +293,6 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGenFramebuffers, "glGenFramebuffers" },
{ (void**)&self->glBindFramebuffer, "glBindFramebuffer" },
{ (void**)&self->glDeleteFramebuffers, "glDeleteFramebuffers" },
{ (void**)&self->glDispatchCompute, "glDispatchCompute" },
{ (void**)&self->glMemoryBarrier, "glMemoryBarrier" },
{ (void**)&self->glViewport, "glViewport" },
{ (void**)&self->glFramebufferTexture2D, "glFramebufferTexture2D" },
{ (void**)&self->glDrawBuffers, "glDrawBuffers" },
@@ -332,14 +329,11 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGetUniformLocation, "glGetUniformLocation" },
{ (void**)&self->glUniform1f, "glUniform1f" },
{ (void**)&self->glUniform2f, "glUniform2f" },
{ (void**)&self->glUniform2i, "glUniform2i" },
{ (void**)&self->glUniformMatrix2fv, "glUniformMatrix2fv" },
{ (void**)&self->glDebugMessageCallback, "glDebugMessageCallback" },
{ (void**)&self->glScissor, "glScissor" },
{ (void**)&self->glReadPixels, "glReadPixels" },
{ (void**)&self->glMapBuffer, "glMapBuffer" },
{ (void**)&self->glUnmapBuffer, "glUnmapBuffer" },
{ (void**)&self->glGetIntegerv, "glGetIntegerv" },
{ NULL, NULL }
};

View File

@@ -83,8 +83,8 @@ static void gsr_video_encoder_software_copy_textures_to_frame(gsr_video_encoder
self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
// cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface);
//self->params.egl->glFlush();
//self->params.egl->glFinish();
self->params.egl->glFlush();
self->params.egl->glFinish();
}
static void gsr_video_encoder_software_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) {

View File

@@ -123,8 +123,8 @@ static bool gsr_video_encoder_vaapi_setup_textures(gsr_video_encoder_vaapi *self
self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[i]);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
while(self->params.egl->glGetError()) {}
while(self->params.egl->eglGetError() != EGL_SUCCESS){}

View File

@@ -17,7 +17,7 @@ bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int widt
self->egl = egl;
self->width = width;
self->height = height;
self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGBA8, GL_RGBA, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */
self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGB8, GL_RGB, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */
if(self->texture == 0) {
fprintf(stderr, "gsr error: gsr_image_writer_init: failed to create texture\n");
return false;
@@ -50,10 +50,10 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const
bool success = false;
switch(image_format) {
case GSR_IMAGE_FORMAT_JPEG:
success = stbi_write_jpg(filepath, self->width, self->height, 4, data, quality);
success = stbi_write_jpg(filepath, self->width, self->height, 3, data, quality);
break;
case GSR_IMAGE_FORMAT_PNG:
success = stbi_write_png(filepath, self->width, self->height, 4, data, 0);
success = stbi_write_png(filepath, self->width, self->height, 3, data, 0);
break;
}
@@ -65,7 +65,7 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const
static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self, const char *filepath, gsr_image_format image_format, int quality) {
assert(self->source == GSR_IMAGE_WRITER_SOURCE_OPENGL);
uint8_t *frame_data = malloc(self->width * self->height * 4);
uint8_t *frame_data = malloc(self->width * self->height * 3);
if(!frame_data) {
fprintf(stderr, "gsr error: gsr_image_writer_write_to_file: failed to allocate memory for image frame\n");
return false;
@@ -74,7 +74,7 @@ static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self
// TODO: hdr support
self->egl->glBindTexture(GL_TEXTURE_2D, self->texture);
// We could use glGetTexSubImage, but it's only available starting from opengl 4.5
self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, frame_data);
self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGB, GL_UNSIGNED_BYTE, frame_data);
self->egl->glBindTexture(GL_TEXTURE_2D, 0);
self->egl->glFlush();

View File

@@ -1072,9 +1072,8 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide
// TODO: More quality options
if(low_power)
av_dict_set_int(&options, "low_power", 1, 0);
// Improves performance but increases vram.
// TODO: Might need a different async_depth for optimal performance on different amd/intel gpus
//av_dict_set_int(&options, "async_depth", 3, 0);
// Improves performance but increases vram
//av_dict_set_int(&options, "async_depth", 8, 0);
if(codec_context->codec_id == AV_CODEC_ID_H264) {
// Removed because it causes stutter in games for some people

View File

@@ -36,36 +36,28 @@ static unsigned int loader_shader(gsr_egl *egl, unsigned int type, const char *s
return shader_id;
}
static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) {
static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) {
unsigned int vertex_shader_id = 0;
unsigned int fragment_shader_id = 0;
unsigned int compute_shader_id = 0;
unsigned int program_id = 0;
int linked = 0;
bool success = false;
if(vertex_shader) {
vertex_shader_id = loader_shader(egl, GL_VERTEX_SHADER, vertex_shader);
if(vertex_shader_id == 0)
goto done;
goto err;
}
if(fragment_shader) {
fragment_shader_id = loader_shader(egl, GL_FRAGMENT_SHADER, fragment_shader);
if(fragment_shader_id == 0)
goto done;
}
if(compute_shader) {
compute_shader_id = loader_shader(egl, GL_COMPUTE_SHADER, compute_shader);
if(compute_shader_id == 0)
goto done;
goto err;
}
program_id = egl->glCreateProgram();
if(program_id == 0) {
fprintf(stderr, "gsr error: load_program: failed to create shader program, error: %d\n", egl->glGetError());
goto done;
goto err;
}
if(vertex_shader_id)
@@ -74,9 +66,6 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const
if(fragment_shader_id)
egl->glAttachShader(program_id, fragment_shader_id);
if(compute_shader_id)
egl->glAttachShader(program_id, compute_shader_id);
egl->glLinkProgram(program_id);
egl->glGetProgramiv(program_id, GL_LINK_STATUS, &linked);
@@ -90,36 +79,37 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const
fprintf(stderr, "gsr error: load program: linking shader program failed, error:\n%s\n", info_log);
}
goto done;
goto err;
}
success = true;
done:
if(!success) {
if(program_id)
egl->glDeleteProgram(program_id);
}
if(compute_shader_id)
egl->glDeleteShader(compute_shader_id);
if(fragment_shader_id)
egl->glDeleteShader(fragment_shader_id);
if(vertex_shader_id)
egl->glDeleteShader(vertex_shader_id);
return program_id;
err:
if(program_id)
egl->glDeleteProgram(program_id);
if(fragment_shader_id)
egl->glDeleteShader(fragment_shader_id);
if(vertex_shader_id)
egl->glDeleteShader(vertex_shader_id);
return 0;
}
int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) {
int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) {
assert(egl);
self->egl = egl;
self->program_id = 0;
if(!vertex_shader && !fragment_shader && !compute_shader) {
fprintf(stderr, "gsr error: gsr_shader_init: vertex, fragment shader and compute shaders can't be NULL at the same time\n");
if(!vertex_shader && !fragment_shader) {
fprintf(stderr, "gsr error: gsr_shader_init: vertex shader and fragment shader can't be NULL at the same time\n");
return -1;
}
self->program_id = load_program(self->egl, vertex_shader, fragment_shader, compute_shader);
self->program_id = load_program(self->egl, vertex_shader, fragment_shader);
if(self->program_id == 0)
return -1;

View File

@@ -14,8 +14,10 @@
#include <xf86drmMode.h>
#include <xf86drm.h>
#include <libdrm/drm_fourcc.h>
#include <X11/Xatom.h>
#include <X11/extensions/Xrandr.h>
#include <va/va_drmcommon.h>
#include <libavcodec/avcodec.h>
#include <libavutil/hwcontext_vaapi.h>
@@ -661,6 +663,241 @@ bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context) {
return device_context->type == AV_HWDEVICE_TYPE_VAAPI;
}
static uint32_t drm_fourcc_to_va_fourcc(uint32_t drm_fourcc) {
switch(drm_fourcc) {
case DRM_FORMAT_XRGB8888: return VA_FOURCC_BGRX;
case DRM_FORMAT_XBGR8888: return VA_FOURCC_RGBX;
case DRM_FORMAT_RGBX8888: return VA_FOURCC_XBGR;
case DRM_FORMAT_BGRX8888: return VA_FOURCC_XRGB;
case DRM_FORMAT_ARGB8888: return VA_FOURCC_BGRA;
case DRM_FORMAT_ABGR8888: return VA_FOURCC_RGBA;
case DRM_FORMAT_RGBA8888: return VA_FOURCC_ABGR;
case DRM_FORMAT_BGRA8888: return VA_FOURCC_ARGB;
default: return drm_fourcc;
}
}
bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes) {
VAConfigID config_id = 0;
VAContextID context_id = 0;
VASurfaceID input_surface_id = 0;
VABufferID buffer_id = 0;
bool success = true;
VADisplay va_dpy = video_codec_context_get_vaapi_display(video_codec_context);
if(!va_dpy) {
success = false;
goto done;
}
VAStatus va_status = vaCreateConfig(va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &config_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateConfig failed, error: %s\n", vaErrorStr(va_status));
success = false;
goto done;
}
VASurfaceID output_surface_id = (uintptr_t)video_frame->data[3];
va_status = vaCreateContext(va_dpy, config_id, size.x, size.y, VA_PROGRESSIVE, &output_surface_id, 1, &context_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateContext failed, error: %s\n", vaErrorStr(va_status));
success = false;
goto done;
}
VADRMPRIMESurfaceDescriptor buf = {0};
buf.fourcc = drm_fourcc_to_va_fourcc(format);//VA_FOURCC_BGRX; // TODO: VA_FOURCC_BGRA, VA_FOURCC_X2R10G10B10
buf.width = size.x;
buf.height = size.y;
buf.num_objects = num_planes;
buf.num_layers = 1;
buf.layers[0].drm_format = format;
buf.layers[0].num_planes = buf.num_objects;
for(int i = 0; i < num_planes; ++i) {
buf.objects[i].fd = fds[i];
buf.objects[i].size = size.y * pitches[i]; // TODO:
buf.objects[i].drm_format_modifier = modifiers[i];
buf.layers[0].object_index[i] = i;
buf.layers[0].offset[i] = offsets[i];
buf.layers[0].pitch[i] = pitches[i];
}
VASurfaceAttrib attribs[2] = {0};
attribs[0].type = VASurfaceAttribMemoryType;
attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
attribs[0].value.type = VAGenericValueTypeInteger;
attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2;
attribs[1].type = VASurfaceAttribExternalBufferDescriptor;
attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
attribs[1].value.type = VAGenericValueTypePointer;
attribs[1].value.value.p = &buf;
// TODO: RT_FORMAT with 10 bit/hdr, VA_RT_FORMAT_RGB32_10
// TODO: Max size same as source_size
va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_RGB32, size.x, size.y, &input_surface_id, 1, attribs, 2);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateSurfaces failed, error: %s\n", vaErrorStr(va_status));
success = false;
goto done;
}
const VARectangle source_region = {
.x = source_pos.x,
.y = source_pos.y,
.width = source_size.x,
.height = source_size.y
};
const VARectangle output_region = {
.x = dest_pos.x,
.y = dest_pos.y,
.width = dest_size.x,
.height = dest_size.y
};
const bool scaled = dest_size.x != source_size.x || dest_size.y != source_size.y;
// Copying a surface to another surface will automatically perform the color conversion. Thanks vaapi!
VAProcPipelineParameterBuffer params = {0};
params.surface = input_surface_id;
params.surface_region = NULL;
params.surface_region = &source_region;
params.output_region = &output_region;
params.output_background_color = 0;
params.filter_flags = scaled ? (VA_FILTER_SCALING_HQ | VA_FILTER_INTERPOLATION_BILINEAR) : 0;
params.pipeline_flags = VA_PROC_PIPELINE_FAST;
params.input_color_properties.colour_primaries = 1;
params.input_color_properties.transfer_characteristics = 1;
params.input_color_properties.matrix_coefficients = 1;
params.surface_color_standard = VAProcColorStandardBT709; // TODO:
params.input_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED;
params.output_color_properties.colour_primaries = 1;
params.output_color_properties.transfer_characteristics = 1;
params.output_color_properties.matrix_coefficients = 1;
params.output_color_standard = VAProcColorStandardBT709; // TODO:
params.output_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED;
params.processing_mode = VAProcPerformanceMode;
// VAProcPipelineCaps pipeline_caps = {0};
// va_status = vaQueryVideoProcPipelineCaps(self->va_dpy,
// self->context_id,
// NULL, 0,
// &pipeline_caps);
// if(va_status == VA_STATUS_SUCCESS) {
// fprintf(stderr, "pipeline_caps: %u, %u\n", (unsigned int)pipeline_caps.rotation_flags, pipeline_caps.blend_flags);
// }
// TODO: params.output_hdr_metadata
// TODO:
// if (first surface to render)
// pipeline_param->output_background_color = 0xff000000; // black
va_status = vaCreateBuffer(va_dpy, context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, &params, &buffer_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateBuffer failed, error: %d\n", va_status);
success = false;
goto done;
}
va_status = vaBeginPicture(va_dpy, context_id, output_surface_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaBeginPicture failed, error: %d\n", va_status);
success = false;
goto done;
}
va_status = vaRenderPicture(va_dpy, context_id, &buffer_id, 1);
if(va_status != VA_STATUS_SUCCESS) {
vaEndPicture(va_dpy, context_id);
fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaRenderPicture failed, error: %d\n", va_status);
success = false;
goto done;
}
va_status = vaEndPicture(va_dpy, context_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaEndPicture failed, error: %d\n", va_status);
success = false;
goto done;
}
// vaSyncBuffer(va_dpy, buffer_id, 1000 * 1000 * 1000);
// vaSyncSurface(va_dpy, input_surface_id);
// vaSyncSurface(va_dpy, output_surface_id);
done:
if(buffer_id)
vaDestroyBuffer(va_dpy, buffer_id);
if(input_surface_id)
vaDestroySurfaces(va_dpy, &input_surface_id, 1);
if(context_id)
vaDestroyContext(va_dpy, context_id);
if(config_id)
vaDestroyConfig(va_dpy, config_id);
return success;
}
bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame) {
if(!image)
return false;
int texture_fourcc = 0;
int texture_num_planes = 0;
uint64_t texture_modifiers = 0;
if(!egl->eglExportDMABUFImageQueryMESA(egl->egl_display, image, &texture_fourcc, &texture_num_planes, &texture_modifiers)) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageQueryMESA failed\n");
return false;
}
if(texture_num_planes <= 0 || texture_num_planes > 8) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: expected planes size to be 0<planes<=8 for drm buf, got %d planes\n", texture_num_planes);
return false;
}
int texture_fds[8];
int32_t texture_strides[8];
int32_t texture_offsets[8];
while(egl->eglGetError() != EGL_SUCCESS){}
if(!egl->eglExportDMABUFImageMESA(egl->egl_display, image, texture_fds, texture_strides, texture_offsets)) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageMESA failed, error: %d\n", egl->eglGetError());
return false;
}
int fds[8];
uint32_t offsets[8];
uint32_t pitches[8];
uint64_t modifiers[8];
for(int i = 0; i < texture_num_planes; ++i) {
fds[i] = texture_fds[i];
offsets[i] = texture_offsets[i];
pitches[i] = texture_strides[i];
modifiers[i] = texture_modifiers;
if(fds[i] == -1)
texture_num_planes = i;
}
const bool success = texture_num_planes > 0 && vaapi_copy_drm_planes_to_video_surface(video_codec_context, video_frame, source_pos, source_size, dest_pos, dest_size, texture_fourcc, source_size, fds, offsets, pitches, modifiers, texture_num_planes);
for(int i = 0; i < texture_num_planes; ++i) {
if(texture_fds[i] > 0) {
close(texture_fds[i]);
texture_fds[i] = -1;
}
}
return success;
}
vec2i scale_keep_aspect_ratio(vec2i from, vec2i to) {
if(from.x == 0 || from.y == 0)
return (vec2i){0, 0};