Merge pull request #1163 from FearlessTobi/add-audio-stretching

audio_core: Add audio stretching support
pull/8/head
bunnei 6 years ago committed by GitHub
commit 926dd41587
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

3
.gitmodules vendored

@ -31,3 +31,6 @@
[submodule "opus"]
path = externals/opus
url = https://github.com/ogniK5377/opus.git
[submodule "soundtouch"]
path = externals/soundtouch
url = https://github.com/citra-emu/ext-soundtouch.git

@ -50,6 +50,9 @@ add_subdirectory(open_source_archives EXCLUDE_FROM_ALL)
add_library(unicorn-headers INTERFACE)
target_include_directories(unicorn-headers INTERFACE ./unicorn/include)
# SoundTouch
add_subdirectory(soundtouch)
# Xbyak
if (ARCHITECTURE_x86_64)
# Defined before "dynarmic" above

@ -0,0 +1 @@
Subproject commit 060181eaf273180d3a7e87349895bd0cb6ccbf4a

@ -17,6 +17,8 @@ add_library(audio_core STATIC
sink_stream.h
stream.cpp
stream.h
time_stretch.cpp
time_stretch.h
$<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h>
)
@ -24,6 +26,7 @@ add_library(audio_core STATIC
create_target_directory_groups(audio_core)
target_link_libraries(audio_core PUBLIC common core)
target_link_libraries(audio_core PRIVATE SoundTouch)
if(ENABLE_CUBEB)
target_link_libraries(audio_core PRIVATE cubeb)

@ -3,27 +3,23 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <atomic>
#include <cstring>
#include <mutex>
#include "audio_core/cubeb_sink.h"
#include "audio_core/stream.h"
#include "audio_core/time_stretch.h"
#include "common/logging/log.h"
#include "common/ring_buffer.h"
#include "core/settings.h"
namespace AudioCore {
class SinkStreamImpl final : public SinkStream {
class CubebSinkStream final : public SinkStream {
public:
SinkStreamImpl(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
const std::string& name)
: ctx{ctx}, num_channels{num_channels_} {
if (num_channels == 6) {
// 6-channel audio does not seem to work with cubeb + SDL, so we downsample this to 2
// channel for now
is_6_channel = true;
num_channels = 2;
}
CubebSinkStream(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
const std::string& name)
: ctx{ctx}, num_channels{std::min(num_channels_, 2u)}, time_stretch{sample_rate,
num_channels} {
cubeb_stream_params params{};
params.rate = sample_rate;
@ -38,7 +34,7 @@ public:
if (cubeb_stream_init(ctx, &stream_backend, name.c_str(), nullptr, nullptr, output_device,
&params, std::max(512u, minimum_latency),
&SinkStreamImpl::DataCallback, &SinkStreamImpl::StateCallback,
&CubebSinkStream::DataCallback, &CubebSinkStream::StateCallback,
this) != CUBEB_OK) {
LOG_CRITICAL(Audio_Sink, "Error initializing cubeb stream");
return;
@ -50,7 +46,7 @@ public:
}
}
~SinkStreamImpl() {
~CubebSinkStream() {
if (!ctx) {
return;
}
@ -62,27 +58,32 @@ public:
cubeb_stream_destroy(stream_backend);
}
void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) override {
if (!ctx) {
void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override {
if (source_num_channels > num_channels) {
// Downsample 6 channels to 2
std::vector<s16> buf;
buf.reserve(samples.size() * num_channels / source_num_channels);
for (size_t i = 0; i < samples.size(); i += source_num_channels) {
for (size_t ch = 0; ch < num_channels; ch++) {
buf.push_back(samples[i + ch]);
}
}
queue.Push(buf);
return;
}
std::lock_guard lock{queue_mutex};
queue.Push(samples);
}
queue.reserve(queue.size() + samples.size() * GetNumChannels());
size_t SamplesInQueue(u32 num_channels) const override {
if (!ctx)
return 0;
if (is_6_channel) {
// Downsample 6 channels to 2
const size_t sample_count_copy_size = samples.size() * 2;
queue.reserve(sample_count_copy_size);
for (size_t i = 0; i < samples.size(); i += num_channels) {
queue.push_back(samples[i]);
queue.push_back(samples[i + 1]);
}
} else {
// Copy as-is
std::copy(samples.begin(), samples.end(), std::back_inserter(queue));
}
return queue.Size() / num_channels;
}
void Flush() override {
should_flush = true;
}
u32 GetNumChannels() const {
@ -95,10 +96,11 @@ private:
cubeb* ctx{};
cubeb_stream* stream_backend{};
u32 num_channels{};
bool is_6_channel{};
std::mutex queue_mutex;
std::vector<s16> queue;
Common::RingBuffer<s16, 0x10000> queue;
std::array<s16, 2> last_frame;
std::atomic<bool> should_flush{};
TimeStretcher time_stretch;
static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
void* output_buffer, long num_frames);
@ -144,38 +146,52 @@ CubebSink::~CubebSink() {
SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
const std::string& name) {
sink_streams.push_back(
std::make_unique<SinkStreamImpl>(ctx, sample_rate, num_channels, output_device, name));
std::make_unique<CubebSinkStream>(ctx, sample_rate, num_channels, output_device, name));
return *sink_streams.back();
}
long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
void* output_buffer, long num_frames) {
SinkStreamImpl* impl = static_cast<SinkStreamImpl*>(user_data);
long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
void* output_buffer, long num_frames) {
CubebSinkStream* impl = static_cast<CubebSinkStream*>(user_data);
u8* buffer = reinterpret_cast<u8*>(output_buffer);
if (!impl) {
return {};
}
std::lock_guard lock{impl->queue_mutex};
const size_t num_channels = impl->GetNumChannels();
const size_t samples_to_write = num_channels * num_frames;
size_t samples_written;
if (Settings::values.enable_audio_stretching) {
const std::vector<s16> in{impl->queue.Pop()};
const size_t num_in{in.size() / num_channels};
s16* const out{reinterpret_cast<s16*>(buffer)};
const size_t out_frames = impl->time_stretch.Process(in.data(), num_in, out, num_frames);
samples_written = out_frames * num_channels;
const size_t frames_to_write{
std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))};
if (impl->should_flush) {
impl->time_stretch.Flush();
impl->should_flush = false;
}
} else {
samples_written = impl->queue.Pop(buffer, samples_to_write);
}
memcpy(buffer, impl->queue.data(), frames_to_write * sizeof(s16) * impl->GetNumChannels());
impl->queue.erase(impl->queue.begin(),
impl->queue.begin() + frames_to_write * impl->GetNumChannels());
if (samples_written >= num_channels) {
std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
num_channels * sizeof(s16));
}
if (frames_to_write < num_frames) {
// Fill the rest of the frames with silence
memset(buffer + frames_to_write * sizeof(s16) * impl->GetNumChannels(), 0,
(num_frames - frames_to_write) * sizeof(s16) * impl->GetNumChannels());
// Fill the rest of the frames with last_frame
for (size_t i = samples_written; i < samples_to_write; i += num_channels) {
std::memcpy(buffer + i * sizeof(s16), &impl->last_frame[0], num_channels * sizeof(s16));
}
return num_frames;
}
void SinkStreamImpl::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
void CubebSinkStream::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
std::vector<std::string> ListCubebSinkDevices() {
std::vector<std::string> device_list;

@ -21,6 +21,12 @@ public:
private:
struct NullSinkStreamImpl final : SinkStream {
void EnqueueSamples(u32 /*num_channels*/, const std::vector<s16>& /*samples*/) override {}
size_t SamplesInQueue(u32 /*num_channels*/) const override {
return 0;
}
void Flush() override {}
} null_sink_stream;
};

@ -25,6 +25,10 @@ public:
* @param samples Samples in interleaved stereo PCM16 format.
*/
virtual void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) = 0;
virtual std::size_t SamplesInQueue(u32 num_channels) const = 0;
virtual void Flush() = 0;
};
using SinkStreamPtr = std::unique_ptr<SinkStream>;

@ -73,6 +73,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
void Stream::PlayNextBuffer() {
if (!IsPlaying()) {
// Ensure we are in playing state before playing the next buffer
sink_stream.Flush();
return;
}
@ -83,6 +84,7 @@ void Stream::PlayNextBuffer() {
if (queued_buffers.empty()) {
// No queued buffers - we are effectively paused
sink_stream.Flush();
return;
}
@ -90,6 +92,7 @@ void Stream::PlayNextBuffer() {
queued_buffers.pop();
VolumeAdjustSamples(active_buffer->Samples());
sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});

@ -0,0 +1,68 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cmath>
#include <cstddef>
#include "audio_core/time_stretch.h"
#include "common/logging/log.h"
namespace AudioCore {
TimeStretcher::TimeStretcher(u32 sample_rate, u32 channel_count)
: m_sample_rate(sample_rate), m_channel_count(channel_count) {
m_sound_touch.setChannels(channel_count);
m_sound_touch.setSampleRate(sample_rate);
m_sound_touch.setPitch(1.0);
m_sound_touch.setTempo(1.0);
}
void TimeStretcher::Clear() {
m_sound_touch.clear();
}
void TimeStretcher::Flush() {
m_sound_touch.flush();
}
size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num_out) {
const double time_delta = static_cast<double>(num_out) / m_sample_rate; // seconds
// We were given actual_samples number of samples, and num_samples were requested from us.
double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);
const double max_latency = 1.0; // seconds
const double max_backlog = m_sample_rate * max_latency;
const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
if (backlog_fullness > 5.0) {
// Too many samples in backlog: Don't push anymore on
num_in = 0;
}
// We ideally want the backlog to be about 50% full.
// This gives some headroom both ways to prevent underflow and overflow.
// We tweak current_ratio to encourage this.
constexpr double tweak_time_scale = 0.05; // seconds
const double tweak_correction = (backlog_fullness - 0.5) * (time_delta / tweak_time_scale);
current_ratio *= std::pow(1.0 + 2.0 * tweak_correction, tweak_correction < 0 ? 3.0 : 1.0);
// This low-pass filter smoothes out variance in the calculated stretch ratio.
// The time-scale determines how responsive this filter is.
constexpr double lpf_time_scale = 2.0; // seconds
const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
// Place a lower limit of 5% speed. When a game boots up, there will be
// many silence samples. These do not need to be timestretched.
m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
m_sound_touch.setTempo(m_stretch_ratio);
LOG_DEBUG(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, m_stretch_ratio,
backlog_fullness);
m_sound_touch.putSamples(in, num_in);
return m_sound_touch.receiveSamples(out, num_out);
}
} // namespace AudioCore

@ -0,0 +1,36 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <SoundTouch.h>
#include "common/common_types.h"
namespace AudioCore {
class TimeStretcher {
public:
TimeStretcher(u32 sample_rate, u32 channel_count);
/// @param in Input sample buffer
/// @param num_in Number of input frames in `in`
/// @param out Output sample buffer
/// @param num_out Desired number of output frames in `out`
/// @returns Actual number of frames written to `out`
size_t Process(const s16* in, size_t num_in, s16* out, size_t num_out);
void Clear();
void Flush();
private:
u32 m_sample_rate;
u32 m_channel_count;
soundtouch::SoundTouch m_sound_touch;
double m_stretch_ratio = 1.0;
};
} // namespace AudioCore

@ -71,6 +71,7 @@ add_library(common STATIC
param_package.cpp
param_package.h
quaternion.h
ring_buffer.h
scm_rev.cpp
scm_rev.h
scope_exit.h

@ -0,0 +1,111 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <array>
#include <atomic>
#include <cstddef>
#include <cstring>
#include <type_traits>
#include <vector>
#include "common/common_types.h"
namespace Common {
/// SPSC ring buffer
/// @tparam T Element type
/// @tparam capacity Number of slots in ring buffer
/// @tparam granularity Slot size in terms of number of elements
template <typename T, size_t capacity, size_t granularity = 1>
class RingBuffer {
/// A "slot" is made of `granularity` elements of `T`.
static constexpr size_t slot_size = granularity * sizeof(T);
// T must be safely memcpy-able and have a trivial default constructor.
static_assert(std::is_trivial_v<T>);
// Ensure capacity is sensible.
static_assert(capacity < std::numeric_limits<size_t>::max() / 2 / granularity);
static_assert((capacity & (capacity - 1)) == 0, "capacity must be a power of two");
// Ensure lock-free.
static_assert(std::atomic<size_t>::is_always_lock_free);
public:
/// Pushes slots into the ring buffer
/// @param new_slots Pointer to the slots to push
/// @param slot_count Number of slots to push
/// @returns The number of slots actually pushed
size_t Push(const void* new_slots, size_t slot_count) {
const size_t write_index = m_write_index.load();
const size_t slots_free = capacity + m_read_index.load() - write_index;
const size_t push_count = std::min(slot_count, slots_free);
const size_t pos = write_index % capacity;
const size_t first_copy = std::min(capacity - pos, push_count);
const size_t second_copy = push_count - first_copy;
const char* in = static_cast<const char*>(new_slots);
std::memcpy(m_data.data() + pos * granularity, in, first_copy * slot_size);
in += first_copy * slot_size;
std::memcpy(m_data.data(), in, second_copy * slot_size);
m_write_index.store(write_index + push_count);
return push_count;
}
size_t Push(const std::vector<T>& input) {
return Push(input.data(), input.size());
}
/// Pops slots from the ring buffer
/// @param output Where to store the popped slots
/// @param max_slots Maximum number of slots to pop
/// @returns The number of slots actually popped
size_t Pop(void* output, size_t max_slots = ~size_t(0)) {
const size_t read_index = m_read_index.load();
const size_t slots_filled = m_write_index.load() - read_index;
const size_t pop_count = std::min(slots_filled, max_slots);
const size_t pos = read_index % capacity;
const size_t first_copy = std::min(capacity - pos, pop_count);
const size_t second_copy = pop_count - first_copy;
char* out = static_cast<char*>(output);
std::memcpy(out, m_data.data() + pos * granularity, first_copy * slot_size);
out += first_copy * slot_size;
std::memcpy(out, m_data.data(), second_copy * slot_size);
m_read_index.store(read_index + pop_count);
return pop_count;
}
std::vector<T> Pop(size_t max_slots = ~size_t(0)) {
std::vector<T> out(std::min(max_slots, capacity) * granularity);
const size_t count = Pop(out.data(), out.size() / granularity);
out.resize(count * granularity);
return out;
}
/// @returns Number of slots used
size_t Size() const {
return m_write_index.load() - m_read_index.load();
}
/// @returns Maximum size of ring buffer
constexpr size_t Capacity() const {
return capacity;
}
private:
// It is important to align the below variables for performance reasons:
// Having them on the same cache-line would result in false-sharing between them.
alignas(128) std::atomic<size_t> m_read_index{0};
alignas(128) std::atomic<size_t> m_write_index{0};
std::array<T, granularity * capacity> m_data;
};
} // namespace Common

@ -148,6 +148,7 @@ struct Values {
// Audio
std::string sink_id;
bool enable_audio_stretching;
std::string audio_device_id;
float volume;

@ -120,6 +120,9 @@ TelemetrySession::TelemetrySession() {
Telemetry::AppendOSInfo(field_collection);
// Log user configuration information
AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
Settings::values.enable_audio_stretching);
AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit);
AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
Settings::values.use_multi_core);

@ -1,5 +1,6 @@
add_executable(tests
common/param_package.cpp
common/ring_buffer.cpp
core/arm/arm_test_common.cpp
core/arm/arm_test_common.h
core/core_timing.cpp

@ -0,0 +1,130 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstddef>
#include <numeric>
#include <thread>
#include <vector>
#include <catch2/catch.hpp>
#include "common/ring_buffer.h"
namespace Common {
TEST_CASE("RingBuffer: Basic Tests", "[common]") {
RingBuffer<char, 4, 1> buf;
// Pushing values into a ring buffer with space should succeed.
for (size_t i = 0; i < 4; i++) {
const char elem = static_cast<char>(i);
const size_t count = buf.Push(&elem, 1);
REQUIRE(count == 1);
}
REQUIRE(buf.Size() == 4);
// Pushing values into a full ring buffer should fail.
{
const char elem = static_cast<char>(42);
const size_t count = buf.Push(&elem, 1);
REQUIRE(count == 0);
}
REQUIRE(buf.Size() == 4);
// Popping multiple values from a ring buffer with values should succeed.
{
const std::vector<char> popped = buf.Pop(2);
REQUIRE(popped.size() == 2);
REQUIRE(popped[0] == 0);
REQUIRE(popped[1] == 1);
}
REQUIRE(buf.Size() == 2);
// Popping a single value from a ring buffer with values should succeed.
{
const std::vector<char> popped = buf.Pop(1);
REQUIRE(popped.size() == 1);
REQUIRE(popped[0] == 2);
}
REQUIRE(buf.Size() == 1);
// Pushing more values than space available should partially suceed.
{
std::vector<char> to_push(6);
std::iota(to_push.begin(), to_push.end(), 88);
const size_t count = buf.Push(to_push);
REQUIRE(count == 3);
}
REQUIRE(buf.Size() == 4);
// Doing an unlimited pop should pop all values.
{
const std::vector<char> popped = buf.Pop();
REQUIRE(popped.size() == 4);
REQUIRE(popped[0] == 3);
REQUIRE(popped[1] == 88);
REQUIRE(popped[2] == 89);
REQUIRE(popped[3] == 90);
}
REQUIRE(buf.Size() == 0);
}
TEST_CASE("RingBuffer: Threaded Test", "[common]") {
RingBuffer<char, 4, 2> buf;
const char seed = 42;
const size_t count = 1000000;
size_t full = 0;
size_t empty = 0;
const auto next_value = [](std::array<char, 2>& value) {
value[0] += 1;
value[1] += 2;
};
std::thread producer{[&] {
std::array<char, 2> value = {seed, seed};
size_t i = 0;
while (i < count) {
if (const size_t c = buf.Push(&value[0], 1); c > 0) {
REQUIRE(c == 1);
i++;
next_value(value);
} else {
full++;
std::this_thread::yield();
}
}
}};
std::thread consumer{[&] {
std::array<char, 2> value = {seed, seed};
size_t i = 0;
while (i < count) {
if (const std::vector<char> v = buf.Pop(1); v.size() > 0) {
REQUIRE(v.size() == 2);
REQUIRE(v[0] == value[0]);
REQUIRE(v[1] == value[1]);
i++;
next_value(value);
} else {
empty++;
std::this_thread::yield();
}
}
}};
producer.join();
consumer.join();
REQUIRE(buf.Size() == 0);
printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty);
}
} // namespace Common

@ -95,6 +95,8 @@ void Config::ReadValues() {
qt_config->beginGroup("Audio");
Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
Settings::values.enable_audio_stretching =
qt_config->value("enable_audio_stretching", true).toBool();
Settings::values.audio_device_id =
qt_config->value("output_device", "auto").toString().toStdString();
Settings::values.volume = qt_config->value("volume", 1).toFloat();
@ -230,6 +232,7 @@ void Config::SaveValues() {
qt_config->beginGroup("Audio");
qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching);
qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id));
qt_config->setValue("volume", Settings::values.volume);
qt_config->endGroup();

@ -46,6 +46,8 @@ void ConfigureAudio::setConfiguration() {
}
ui->output_sink_combo_box->setCurrentIndex(new_sink_index);
ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching);
// The device list cannot be pre-populated (nor listed) until the output sink is known.
updateAudioDevices(new_sink_index);
@ -67,6 +69,7 @@ void ConfigureAudio::applyConfiguration() {
Settings::values.sink_id =
ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex())
.toStdString();
Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked();
Settings::values.audio_device_id =
ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex())
.toStdString();

@ -31,6 +31,16 @@
</item>
</layout>
</item>
<item>
<widget class="QCheckBox" name="toggle_audio_stretching">
<property name="toolTip">
<string>This post-processing effect adjusts audio speed to match emulation speed and helps prevent audio stutter. This however increases audio latency.</string>
</property>
<property name="text">
<string>Enable audio stretching</string>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout">
<item>

@ -108,6 +108,8 @@ void Config::ReadValues() {
// Audio
Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
Settings::values.enable_audio_stretching =
sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1);

@ -150,6 +150,12 @@ swap_screen =
# auto (default): Auto-select, null: No audio output, cubeb: Cubeb audio engine (if available)
output_engine =
# Whether or not to enable the audio-stretching post-processing effect.
# This effect adjusts audio speed to match emulation speed and helps prevent audio stutter,
# at the cost of increasing audio latency.
# 0: No, 1 (default): Yes
enable_audio_stretching =
# Which audio device to use.
# auto (default): Auto-select
output_device =

Loading…
Cancel
Save