Use async_depth option on vaapi for much better encoding performance

This commit is contained in:
dec05eba
2024-09-18 00:11:23 +02:00
parent 702c681a95
commit 41bd6cee63
5 changed files with 30 additions and 14 deletions

7
TODO
View File

@@ -157,3 +157,10 @@ Add cbr option.
Restart replay/update video resolution if monitor resolution changes. Restart replay/update video resolution if monitor resolution changes.
Fix pure vaapi copy on intel. Fix pure vaapi copy on intel.
ffmpeg supports vulkan encoding now (h264!). Doesn't work on amd yet because mesa is missing VK_KHR_video_maintenance1, see https://gitlab.freedesktop.org/mesa/mesa/-/issues/11857. Test on nvidia!
Test vaapi low latency mode (setenv("AMD_DEBUG", "lowlatencyenc", true);), added in mesa 24.1.4, released on july 17, 2024. Note that this forces gpu power usage to max at all times, even when recording at 2 fps.
Use nvidia low latency options for better encoding times.
Test ideal async_depth value. Increasing async_depth also increased gpu memory usage a lot (from 100mb to 500mb when moving from async_depth 2 to 16) at 4k resolution. Setting it to 8 increases it by 200mb which might be ok.

View File

@@ -1,6 +1,7 @@
#ifndef GSR_DAMAGE_H #ifndef GSR_DAMAGE_H
#define GSR_DAMAGE_H #define GSR_DAMAGE_H
#include "cursor.h"
#include "utils.h" #include "utils.h"
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
@@ -31,7 +32,7 @@ typedef struct {
//vec2i window_pos; //vec2i window_pos;
vec2i window_size; vec2i window_size;
vec2i cursor_position; /* Relative to |window| */ gsr_cursor cursor; /* Relative to |window| */
gsr_monitor monitor; gsr_monitor monitor;
char monitor_name[32]; char monitor_name[32];
} gsr_damage; } gsr_damage;

View File

@@ -497,6 +497,9 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color
} }
static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, vec2i capture_pos, vec2i target_pos) { static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, vec2i capture_pos, vec2i target_pos) {
if(!self->x11_cursor.visible)
return;
gsr_cursor_tick(&self->x11_cursor, DefaultRootWindow(self->params.egl->x11.dpy)); gsr_cursor_tick(&self->x11_cursor, DefaultRootWindow(self->params.egl->x11.dpy));
const vec2i cursor_pos = { const vec2i cursor_pos = {

View File

@@ -53,6 +53,9 @@ bool gsr_damage_init(gsr_damage *self, gsr_egl *egl, bool track_cursor) {
return false; return false;
} }
if(self->track_cursor)
self->track_cursor = gsr_cursor_init(&self->cursor, self->egl, self->egl->x11.dpy) == 0;
XRRSelectInput(self->egl->x11.dpy, DefaultRootWindow(self->egl->x11.dpy), RRScreenChangeNotifyMask | RRCrtcChangeNotifyMask | RROutputChangeNotifyMask); XRRSelectInput(self->egl->x11.dpy, DefaultRootWindow(self->egl->x11.dpy), RRScreenChangeNotifyMask | RRCrtcChangeNotifyMask | RROutputChangeNotifyMask);
self->damaged = true; self->damaged = true;
@@ -65,6 +68,8 @@ void gsr_damage_deinit(gsr_damage *self) {
self->damage = None; self->damage = None;
} }
gsr_cursor_deinit(&self->cursor);
self->damage_event = 0; self->damage_event = 0;
self->damage_error = 0; self->damage_error = 0;
@@ -245,16 +250,11 @@ static void gsr_damage_on_damage_event(gsr_damage *self, XEvent *xev) {
XFlush(self->egl->x11.dpy); XFlush(self->egl->x11.dpy);
} }
static void gsr_damage_on_event_cursor(gsr_damage *self) { static void gsr_damage_on_tick_cursor(gsr_damage *self) {
Window root_return = None; vec2i prev_cursor_pos = self->cursor.position;
Window child_return = None; gsr_cursor_tick(&self->cursor, self->window);
int dummy_i; if(self->cursor.position.x != prev_cursor_pos.x || self->cursor.position.y != prev_cursor_pos.y) {
unsigned int dummy_u; const gsr_rectangle cursor_region = { self->cursor.position, self->cursor.size };
vec2i cursor_position = {0, 0};
XQueryPointer(self->egl->x11.dpy, self->window, &root_return, &child_return, &dummy_i, &dummy_i, &cursor_position.x, &cursor_position.y, &dummy_u);
if(cursor_position.x != self->cursor_position.x || cursor_position.y != self->cursor_position.y) {
self->cursor_position = cursor_position;
const gsr_rectangle cursor_region = { self->cursor_position, {64, 64} }; // TODO: Track cursor size
switch(self->track_type) { switch(self->track_type) {
case GSR_DAMAGE_TRACK_NONE: { case GSR_DAMAGE_TRACK_NONE: {
self->damaged = true; self->damaged = true;
@@ -302,14 +302,17 @@ void gsr_damage_on_event(gsr_damage *self, XEvent *xev) {
if(self->damage_event && xev->type == self->damage_event + XDamageNotify) if(self->damage_event && xev->type == self->damage_event + XDamageNotify)
gsr_damage_on_damage_event(self, xev); gsr_damage_on_damage_event(self, xev);
if(self->track_cursor)
gsr_cursor_on_event(&self->cursor, xev);
} }
void gsr_damage_tick(gsr_damage *self) { void gsr_damage_tick(gsr_damage *self) {
if(self->damage_event == 0 || self->track_type == GSR_DAMAGE_TRACK_NONE) if(self->damage_event == 0 || self->track_type == GSR_DAMAGE_TRACK_NONE)
return; return;
if(self->track_cursor && !self->damaged) if(self->track_cursor && self->cursor.visible && !self->damaged)
gsr_damage_on_event_cursor(self); gsr_damage_on_tick_cursor(self);
} }
bool gsr_damage_is_damaged(gsr_damage *self) { bool gsr_damage_is_damaged(gsr_damage *self) {

View File

@@ -887,6 +887,8 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide
} else { } else {
// TODO: More quality options // TODO: More quality options
//av_dict_set_int(&options, "low_power", 1, 0); //av_dict_set_int(&options, "low_power", 1, 0);
// Improves performance but increases vram
av_dict_set_int(&options, "async_depth", 8, 0);
if(codec_context->codec_id == AV_CODEC_ID_H264) { if(codec_context->codec_id == AV_CODEC_ID_H264) {
// TODO: // TODO:
@@ -3389,7 +3391,7 @@ int main(int argc, char **argv) {
const int64_t expected_frames = std::round((this_video_frame_time - record_start_time) / target_fps); const int64_t expected_frames = std::round((this_video_frame_time - record_start_time) / target_fps);
int num_frames = std::max((int64_t)0LL, expected_frames - video_pts_counter); int num_frames = std::max((int64_t)0LL, expected_frames - video_pts_counter);
const double num_frames_seconds = num_frames * target_fps; const double num_frames_seconds = num_frames * target_fps;
if((damaged || num_frames_seconds >= damage_timeout_seconds) && !paused/* && fps_counter < fps + 100*/) { if((damaged || (framerate_mode == FramerateMode::CONSTANT && num_frames > 0) || (framerate_mode != FramerateMode::CONSTANT && num_frames_seconds >= damage_timeout_seconds)) && !paused) {
gsr_damage_clear(&damage); gsr_damage_clear(&damage);
if(capture->clear_damage) if(capture->clear_damage)
capture->clear_damage(capture); capture->clear_damage(capture);