mirror of
https://repo.dec05eba.com/gpu-screen-recorder
synced 2026-05-07 07:16:21 +09:00
Time based audio latency, test, might fix some shits
This commit is contained in:
@@ -24,6 +24,7 @@
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
void *handle;
|
void *handle;
|
||||||
unsigned int frames;
|
unsigned int frames;
|
||||||
|
double latency_seconds;
|
||||||
} SoundDevice;
|
} SoundDevice;
|
||||||
|
|
||||||
struct AudioInput {
|
struct AudioInput {
|
||||||
@@ -53,9 +54,9 @@ void sound_device_close(SoundDevice *device);
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
Returns the next chunk of audio into @buffer.
|
Returns the next chunk of audio into @buffer.
|
||||||
Returns the number of frames read, or a negative value on failure.
|
Returns the number of bytes read, or a negative value on failure.
|
||||||
*/
|
*/
|
||||||
int sound_device_read_next_chunk(SoundDevice *device, void **buffer);
|
int sound_device_read_next_chunk(SoundDevice *device, void **buffer, double timeout_sec);
|
||||||
|
|
||||||
std::vector<AudioInput> get_pulseaudio_inputs();
|
std::vector<AudioInput> get_pulseaudio_inputs();
|
||||||
|
|
||||||
|
|||||||
109
src/main.cpp
109
src/main.cpp
@@ -315,7 +315,7 @@ static AVCodecContext* create_audio_codec_context(int fps, AudioCodec audio_code
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
codec_context->time_base.num = 1;
|
codec_context->time_base.num = 1;
|
||||||
codec_context->time_base.den = codec_context->sample_rate;
|
codec_context->time_base.den = AV_TIME_BASE;
|
||||||
codec_context->framerate.num = fps;
|
codec_context->framerate.num = fps;
|
||||||
codec_context->framerate.den = 1;
|
codec_context->framerate.den = 1;
|
||||||
codec_context->thread_count = 1;
|
codec_context->thread_count = 1;
|
||||||
@@ -1699,10 +1699,10 @@ int main(int argc, char **argv) {
|
|||||||
usage();
|
usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
AudioCodec audio_codec = AudioCodec::OPUS;
|
AudioCodec audio_codec = AudioCodec::AAC;
|
||||||
const char *audio_codec_to_use = args["-ac"].value();
|
const char *audio_codec_to_use = args["-ac"].value();
|
||||||
if(!audio_codec_to_use)
|
if(!audio_codec_to_use)
|
||||||
audio_codec_to_use = "opus";
|
audio_codec_to_use = "aac";
|
||||||
|
|
||||||
if(strcmp(audio_codec_to_use, "aac") == 0) {
|
if(strcmp(audio_codec_to_use, "aac") == 0) {
|
||||||
audio_codec = AudioCodec::AAC;
|
audio_codec = AudioCodec::AAC;
|
||||||
@@ -1715,10 +1715,10 @@ int main(int argc, char **argv) {
|
|||||||
usage();
|
usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
if(audio_codec == AudioCodec::FLAC) {
|
if(audio_codec == AudioCodec::OPUS || audio_codec == AudioCodec::FLAC) {
|
||||||
fprintf(stderr, "Warning: flac audio codec has been temporary disabled, using opus audio codec instead\n");
|
fprintf(stderr, "Warning: opus and flac audio codecs has been temporary disabled, using aac audio codec instead\n");
|
||||||
audio_codec_to_use = "opus";
|
audio_codec_to_use = "aac";
|
||||||
audio_codec = AudioCodec::OPUS;
|
audio_codec = AudioCodec::AAC;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool overclock = false;
|
bool overclock = false;
|
||||||
@@ -2397,58 +2397,21 @@ int main(int argc, char **argv) {
|
|||||||
swr_init(swr);
|
swr_init(swr);
|
||||||
}
|
}
|
||||||
|
|
||||||
const double target_audio_hz = 1.0 / (double)audio_track.codec_context->sample_rate;
|
const int64_t no_input_sleep_ms = 500;
|
||||||
double received_audio_time = clock_get_monotonic_seconds();
|
|
||||||
const int64_t timeout_ms = std::round((1000.0 / (double)audio_track.codec_context->sample_rate) * 1000.0);
|
|
||||||
|
|
||||||
// Remove this for now, it doesn't work well for everybody. The timing is different depending on system
|
|
||||||
#if 0
|
|
||||||
// Move audio forward by around 252 ms (for opus/aac), or 42ms for flac. This is just a shitty way to handle audio latency but pulseaudio latency calculation
|
|
||||||
// returns much lower value which isn't helpful.
|
|
||||||
if(needs_audio_conversion)
|
|
||||||
swr_convert(swr, &audio_device.frame->data[0], audio_track.codec_context->frame_size, (const uint8_t**)&empty_audio, audio_track.codec_context->frame_size);
|
|
||||||
else
|
|
||||||
audio_device.frame->data[0] = empty_audio;
|
|
||||||
|
|
||||||
int num_frames_to_delay = 12;
|
|
||||||
if(audio_codec == AudioCodec::FLAC)
|
|
||||||
num_frames_to_delay = 2;
|
|
||||||
|
|
||||||
for(int i = 0; i < num_frames_to_delay; ++i) {
|
|
||||||
if(audio_track.graph) {
|
|
||||||
std::lock_guard<std::mutex> lock(audio_filter_mutex);
|
|
||||||
// TODO: av_buffersrc_add_frame
|
|
||||||
if(av_buffersrc_write_frame(audio_device.src_filter_ctx, audio_device.frame) < 0) {
|
|
||||||
fprintf(stderr, "Error: failed to add audio frame to filter\n");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int ret = avcodec_send_frame(audio_track.codec_context, audio_device.frame);
|
|
||||||
if(ret >= 0) {
|
|
||||||
// TODO: Move to separate thread because this could write to network (for example when livestreaming)
|
|
||||||
receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_device.frame->pts, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, write_output_mutex, paused_time_offset);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "Failed to encode audio!\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
audio_device.frame->pts += audio_track.codec_context->frame_size;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
while(running) {
|
while(running) {
|
||||||
void *sound_buffer;
|
void *sound_buffer;
|
||||||
int sound_buffer_size = -1;
|
int sound_buffer_size = -1;
|
||||||
if(audio_device.sound_device.handle)
|
if(audio_device.sound_device.handle)
|
||||||
sound_buffer_size = sound_device_read_next_chunk(&audio_device.sound_device, &sound_buffer);
|
sound_buffer_size = sound_device_read_next_chunk(&audio_device.sound_device, &sound_buffer, 0.5);
|
||||||
|
|
||||||
const bool got_audio_data = sound_buffer_size >= 0;
|
const bool got_audio_data = sound_buffer_size >= 0;
|
||||||
|
|
||||||
const double this_audio_frame_time = clock_get_monotonic_seconds() - paused_time_offset;
|
const double this_audio_frame_time = clock_get_monotonic_seconds() - paused_time_offset;
|
||||||
|
|
||||||
if(paused) {
|
if(paused) {
|
||||||
if(got_audio_data)
|
|
||||||
received_audio_time = this_audio_frame_time;
|
|
||||||
|
|
||||||
if(!audio_device.sound_device.handle)
|
if(!audio_device.sound_device.handle)
|
||||||
usleep(timeout_ms * 1000);
|
usleep(no_input_sleep_ms * 1000);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -2459,37 +2422,22 @@ int main(int argc, char **argv) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Is this |received_audio_time| really correct?
|
if(!got_audio_data) {
|
||||||
int64_t num_missing_frames = std::round((this_audio_frame_time - received_audio_time) / target_audio_hz / (int64_t)audio_track.codec_context->frame_size);
|
|
||||||
if(got_audio_data)
|
|
||||||
num_missing_frames = std::max((int64_t)0, num_missing_frames - 1);
|
|
||||||
|
|
||||||
if(!audio_device.sound_device.handle)
|
|
||||||
num_missing_frames = std::max((int64_t)1, num_missing_frames);
|
|
||||||
|
|
||||||
if(got_audio_data)
|
|
||||||
received_audio_time = this_audio_frame_time;
|
|
||||||
|
|
||||||
// Fucking hell is there a better way to do this? I JUST WANT TO KEEP VIDEO AND AUDIO SYNCED HOLY FUCK I WANT TO KILL MYSELF NOW.
|
|
||||||
// THIS PIECE OF SHIT WANTS EMPTY FRAMES OTHERWISE VIDEO PLAYS TOO FAST TO KEEP UP WITH AUDIO OR THE AUDIO PLAYS TOO EARLY.
|
|
||||||
// BUT WE CANT USE DELAYS TO GIVE DUMMY DATA BECAUSE PULSEAUDIO MIGHT GIVE AUDIO A BIG DELAYED!!!
|
|
||||||
// This garbage is needed because we want to produce constant frame rate videos instead of variable frame rate
|
|
||||||
// videos because bad software such as video editing software and VLC do not support variable frame rate software,
|
|
||||||
// despite nvidia shadowplay and xbox game bar producing variable frame rate videos.
|
|
||||||
// So we have to make sure we produce frames at the same relative rate as the video.
|
|
||||||
if(num_missing_frames >= 5 || !audio_device.sound_device.handle) {
|
|
||||||
// TODO:
|
// TODO:
|
||||||
//audio_track.frame->data[0] = empty_audio;
|
//audio_track.frame->data[0] = empty_audio;
|
||||||
received_audio_time = this_audio_frame_time;
|
|
||||||
if(needs_audio_conversion)
|
if(needs_audio_conversion)
|
||||||
swr_convert(swr, &audio_device.frame->data[0], audio_track.codec_context->frame_size, (const uint8_t**)&empty_audio, audio_track.codec_context->frame_size);
|
swr_convert(swr, &audio_device.frame->data[0], audio_track.codec_context->frame_size, (const uint8_t**)&empty_audio, audio_track.codec_context->frame_size);
|
||||||
else
|
else
|
||||||
audio_device.frame->data[0] = empty_audio;
|
audio_device.frame->data[0] = empty_audio;
|
||||||
|
|
||||||
// TODO: Check if duplicate frame can be saved just by writing it with a different pts instead of sending it again
|
const int64_t new_pts = (this_audio_frame_time - record_start_time) * AV_TIME_BASE;
|
||||||
std::lock_guard<std::mutex> lock(audio_filter_mutex);
|
if(new_pts == audio_device.frame->pts)
|
||||||
for(int i = 0; i < num_missing_frames; ++i) {
|
continue;
|
||||||
|
audio_device.frame->pts = new_pts;
|
||||||
|
//audio_device.frame->linesize[0] = sound_buffer_size / 2;
|
||||||
|
|
||||||
if(audio_track.graph) {
|
if(audio_track.graph) {
|
||||||
|
std::lock_guard<std::mutex> lock(audio_filter_mutex);
|
||||||
// TODO: av_buffersrc_add_frame
|
// TODO: av_buffersrc_add_frame
|
||||||
if(av_buffersrc_write_frame(audio_device.src_filter_ctx, audio_device.frame) < 0) {
|
if(av_buffersrc_write_frame(audio_device.src_filter_ctx, audio_device.frame) < 0) {
|
||||||
fprintf(stderr, "Error: failed to add audio frame to filter\n");
|
fprintf(stderr, "Error: failed to add audio frame to filter\n");
|
||||||
@@ -2503,12 +2451,10 @@ int main(int argc, char **argv) {
|
|||||||
fprintf(stderr, "Failed to encode audio!\n");
|
fprintf(stderr, "Failed to encode audio!\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
audio_device.frame->pts += audio_track.codec_context->frame_size;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!audio_device.sound_device.handle)
|
if(!audio_device.sound_device.handle)
|
||||||
usleep(timeout_ms * 1000);
|
usleep(no_input_sleep_ms * 1000);
|
||||||
|
|
||||||
if(got_audio_data) {
|
if(got_audio_data) {
|
||||||
// TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
|
// TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
|
||||||
@@ -2517,6 +2463,12 @@ int main(int argc, char **argv) {
|
|||||||
else
|
else
|
||||||
audio_device.frame->data[0] = (uint8_t*)sound_buffer;
|
audio_device.frame->data[0] = (uint8_t*)sound_buffer;
|
||||||
|
|
||||||
|
const int64_t new_pts = (this_audio_frame_time - record_start_time) * AV_TIME_BASE;
|
||||||
|
if(new_pts == audio_device.frame->pts)
|
||||||
|
continue;
|
||||||
|
audio_device.frame->pts = new_pts;
|
||||||
|
//audio_device.frame->linesize[0] = sound_buffer_size / 2;
|
||||||
|
|
||||||
if(audio_track.graph) {
|
if(audio_track.graph) {
|
||||||
std::lock_guard<std::mutex> lock(audio_filter_mutex);
|
std::lock_guard<std::mutex> lock(audio_filter_mutex);
|
||||||
// TODO: av_buffersrc_add_frame
|
// TODO: av_buffersrc_add_frame
|
||||||
@@ -2532,8 +2484,6 @@ int main(int argc, char **argv) {
|
|||||||
fprintf(stderr, "Failed to encode audio!\n");
|
fprintf(stderr, "Failed to encode audio!\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
audio_device.frame->pts += audio_track.codec_context->frame_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2571,7 +2521,11 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
int err = 0;
|
int err = 0;
|
||||||
while ((err = av_buffersink_get_frame(audio_track.sink, aframe)) >= 0) {
|
while ((err = av_buffersink_get_frame(audio_track.sink, aframe)) >= 0) {
|
||||||
aframe->pts = audio_track.pts;
|
const int64_t new_pts = ((clock_get_monotonic_seconds() - paused_time_offset) - record_start_time) * AV_TIME_BASE;
|
||||||
|
if(new_pts == aframe->pts)
|
||||||
|
continue;
|
||||||
|
aframe->pts = new_pts;
|
||||||
|
//aframe->linesize[0] = sound_buffer_size / 2;
|
||||||
err = avcodec_send_frame(audio_track.codec_context, aframe);
|
err = avcodec_send_frame(audio_track.codec_context, aframe);
|
||||||
if(err >= 0){
|
if(err >= 0){
|
||||||
// TODO: Move to separate thread because this could write to network (for example when livestreaming)
|
// TODO: Move to separate thread because this could write to network (for example when livestreaming)
|
||||||
@@ -2580,7 +2534,6 @@ int main(int argc, char **argv) {
|
|||||||
fprintf(stderr, "Failed to encode audio!\n");
|
fprintf(stderr, "Failed to encode audio!\n");
|
||||||
}
|
}
|
||||||
av_frame_unref(aframe);
|
av_frame_unref(aframe);
|
||||||
audio_track.pts += audio_track.codec_context->frame_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ struct pa_handle {
|
|||||||
size_t output_index, output_length;
|
size_t output_index, output_length;
|
||||||
|
|
||||||
int operation_success;
|
int operation_success;
|
||||||
|
double latency_seconds;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void pa_sound_device_free(pa_handle *s) {
|
static void pa_sound_device_free(pa_handle *s) {
|
||||||
@@ -79,6 +80,7 @@ static pa_handle* pa_sound_device_new(const char *server,
|
|||||||
p->read_data = NULL;
|
p->read_data = NULL;
|
||||||
p->read_length = 0;
|
p->read_length = 0;
|
||||||
p->read_index = 0;
|
p->read_index = 0;
|
||||||
|
p->latency_seconds = 0;
|
||||||
|
|
||||||
const int buffer_size = attr->maxlength;
|
const int buffer_size = attr->maxlength;
|
||||||
void *buffer = malloc(buffer_size);
|
void *buffer = malloc(buffer_size);
|
||||||
@@ -153,24 +155,19 @@ fail:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a negative value on failure or if |p->output_length| data is not available within the time frame specified by the sample rate
|
static int pa_sound_device_read(pa_handle *p, double timeout_seconds) {
|
||||||
static int pa_sound_device_read(pa_handle *p) {
|
|
||||||
assert(p);
|
assert(p);
|
||||||
|
|
||||||
const int64_t timeout_ms = std::round((1000.0 / (double)pa_stream_get_sample_spec(p->stream)->rate) * 1000.0);
|
|
||||||
const double start_time = clock_get_monotonic_seconds();
|
const double start_time = clock_get_monotonic_seconds();
|
||||||
|
|
||||||
bool success = false;
|
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
//pa_usec_t latency = 0;
|
||||||
|
//int negative = 0;
|
||||||
int *rerror = &r;
|
int *rerror = &r;
|
||||||
CHECK_DEAD_GOTO(p, rerror, fail);
|
CHECK_DEAD_GOTO(p, rerror, fail);
|
||||||
|
|
||||||
while (p->output_index < p->output_length) {
|
while(clock_get_monotonic_seconds() - start_time < timeout_seconds) {
|
||||||
if((clock_get_monotonic_seconds() - start_time) * 1000 >= timeout_ms)
|
pa_mainloop_prepare(p->mainloop, 1 * 1000);
|
||||||
return -1;
|
|
||||||
|
|
||||||
if(!p->read_data) {
|
|
||||||
pa_mainloop_prepare(p->mainloop, 1 * 1000); // 1 ms
|
|
||||||
pa_mainloop_poll(p->mainloop);
|
pa_mainloop_poll(p->mainloop);
|
||||||
pa_mainloop_dispatch(p->mainloop);
|
pa_mainloop_dispatch(p->mainloop);
|
||||||
|
|
||||||
@@ -180,51 +177,19 @@ static int pa_sound_device_read(pa_handle *p) {
|
|||||||
if(!p->read_data && p->read_length == 0)
|
if(!p->read_data && p->read_length == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if(!p->read_data && p->read_length > 0) {
|
// pa_operation_unref(pa_stream_update_timing_info(p->stream, NULL, NULL));
|
||||||
// There is a hole in the stream :( drop it. Maybe we should generate silence instead? TODO
|
// if (pa_stream_get_latency(p->stream, &latency, &negative) >= 0) {
|
||||||
if(pa_stream_drop(p->stream) != 0)
|
// fprintf(stderr, "latency: %lu ms, negative: %d, extra delay: %f ms\n", latency / 1000, negative, (clock_get_monotonic_seconds() - start_time) * 1000.0);
|
||||||
goto fail;
|
// }
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(p->read_length <= 0) {
|
memcpy(p->output_data, p->read_data, p->read_length);
|
||||||
p->read_data = NULL;
|
pa_stream_drop(p->stream);
|
||||||
if(pa_stream_drop(p->stream) != 0)
|
p->latency_seconds = clock_get_monotonic_seconds() - start_time;
|
||||||
goto fail;
|
return p->read_length;
|
||||||
|
|
||||||
CHECK_DEAD_GOTO(p, rerror, fail);
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const size_t space_free_in_output_buffer = p->output_length - p->output_index;
|
|
||||||
if(space_free_in_output_buffer < p->read_length) {
|
|
||||||
memcpy(p->output_data + p->output_index, (const uint8_t*)p->read_data + p->read_index, space_free_in_output_buffer);
|
|
||||||
p->output_index = 0;
|
|
||||||
p->read_index += space_free_in_output_buffer;
|
|
||||||
p->read_length -= space_free_in_output_buffer;
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
memcpy(p->output_data + p->output_index, (const uint8_t*)p->read_data + p->read_index, p->read_length);
|
|
||||||
p->output_index += p->read_length;
|
|
||||||
p->read_data = NULL;
|
|
||||||
p->read_length = 0;
|
|
||||||
p->read_index = 0;
|
|
||||||
|
|
||||||
if(pa_stream_drop(p->stream) != 0)
|
|
||||||
goto fail;
|
|
||||||
|
|
||||||
if(p->output_index == p->output_length) {
|
|
||||||
p->output_index = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
success = true;
|
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
return success ? 0 : -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static pa_sample_format_t audio_format_to_pulse_audio_format(AudioFormat audio_format) {
|
static pa_sample_format_t audio_format_to_pulse_audio_format(AudioFormat audio_format) {
|
||||||
@@ -269,6 +234,7 @@ int sound_device_get_by_name(SoundDevice *device, const char *device_name, const
|
|||||||
|
|
||||||
device->handle = handle;
|
device->handle = handle;
|
||||||
device->frames = period_frame_size;
|
device->frames = period_frame_size;
|
||||||
|
device->latency_seconds = 0.0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -278,14 +244,16 @@ void sound_device_close(SoundDevice *device) {
|
|||||||
device->handle = NULL;
|
device->handle = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
|
int sound_device_read_next_chunk(SoundDevice *device, void **buffer, double timeout_sec) {
|
||||||
pa_handle *pa = (pa_handle*)device->handle;
|
pa_handle *pa = (pa_handle*)device->handle;
|
||||||
if(pa_sound_device_read(pa) < 0) {
|
int size = pa_sound_device_read(pa, timeout_sec);
|
||||||
|
if(size < 0) {
|
||||||
//fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
|
//fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
*buffer = pa->output_data;
|
*buffer = pa->output_data;
|
||||||
return device->frames;
|
device->latency_seconds = pa->latency_seconds;
|
||||||
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pa_state_cb(pa_context *c, void *userdata) {
|
static void pa_state_cb(pa_context *c, void *userdata) {
|
||||||
|
|||||||
Reference in New Issue
Block a user