From 32acf700a113604592e0cef1832ab1e00a56fb04 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Thu, 3 Apr 2025 10:42:11 +1100 Subject: [PATCH 01/13] Implement apm --- Runtime/Scripts/AudioProcessingModule.cs | 100 ++++++++++++++++++ Runtime/Scripts/AudioProcessingModule.cs.meta | 11 ++ 2 files changed, 111 insertions(+) create mode 100644 Runtime/Scripts/AudioProcessingModule.cs create mode 100644 Runtime/Scripts/AudioProcessingModule.cs.meta diff --git a/Runtime/Scripts/AudioProcessingModule.cs b/Runtime/Scripts/AudioProcessingModule.cs new file mode 100644 index 00000000..19d053e3 --- /dev/null +++ b/Runtime/Scripts/AudioProcessingModule.cs @@ -0,0 +1,100 @@ +using LiveKit.Proto; +using LiveKit.Internal.FFIClients.Requests; +using LiveKit.Internal; +using System; +using System.Runtime.CompilerServices; +[assembly: InternalsVisibleTo("Tests")] + +namespace LiveKit +{ + /// + /// Provides WebRTC audio processing capabilities including echo cancellation, noise suppression, + /// high-pass filtering, and gain control. + /// + public sealed class AudioProcessingModule + { + internal readonly FfiHandle Handle; + + /// + /// Initializes an instance with the specified audio processing features. + /// + /// Whether to enable echo cancellation. + /// Whether to enable noise suppression. + /// Whether to enable high-pass filtering. + /// Whether to enable gain control. + public AudioProcessingModule( + bool echoCancellationEnabled, + bool noiseSuppressionEnabled, + bool highPassFilterEnabled, + bool gainControllerEnabled) + { + using var request = FFIBridge.Instance.NewRequest(); + var newApm = request.request; + newApm.EchoCancellerEnabled = echoCancellationEnabled; + newApm.NoiseSuppressionEnabled = noiseSuppressionEnabled; + newApm.HighPassFilterEnabled = highPassFilterEnabled; + newApm.GainControllerEnabled = gainControllerEnabled; + + using var response = request.Send(); + FfiResponse res = response; + Handle = FfiHandle.FromOwnedHandle(res.NewApm.Apm.Handle); + } + + /// + /// Process the provided audio frame using the configured audio processing features. + /// + /// The audio frame to process. + /// + /// Important: Audio frames must be exactly 10 ms in duration. + /// + /// The input audio frame is modified in-place (if applicable) by the underlying audio + /// processing module (e.g., echo cancellation, noise suppression, etc.). + /// + public void ProcessStream(AudioFrame data) + { + using var request = FFIBridge.Instance.NewRequest(); + var processStream = request.request; + processStream.ApmHandle = (ulong)Handle.DangerousGetHandle(); + processStream.DataPtr = (ulong)data.Data; + processStream.Size = (uint)data.Length; + processStream.SampleRate = data.SampleRate; + processStream.NumChannels = data.NumChannels; + + using var response = request.Send(); + FfiResponse res = response; + if (res.ApmProcessStream.HasError) + { + throw new Exception(res.ApmProcessStream.Error); + } + } + + /// + /// Process the reverse audio frame (typically used for echo cancellation in a full-duplex setup). + /// + /// The audio frame to process. + /// + /// Important: Audio frames must be exactly 10 ms in duration. + /// + /// In an echo cancellation scenario, this method is used to process the "far-end" audio + /// prior to mixing or feeding it into the echo canceller. Like , the + /// input audio frame is modified in-place by the underlying processing module. + /// + public void ProcessReverseStream(AudioFrame data) + { + using var request = FFIBridge.Instance.NewRequest(); + var processReverseStream = request.request; + processReverseStream.ApmHandle = (ulong)Handle.DangerousGetHandle(); + processReverseStream.DataPtr = (ulong)data.Data; + processReverseStream.Size = (uint)data.Length; + processReverseStream.SampleRate = data.SampleRate; + processReverseStream.NumChannels = data.NumChannels; + + using var response = request.Send(); + FfiResponse res = response; + if (res.ApmProcessReverseStream.HasError) + { + throw new Exception(res.ApmProcessReverseStream.Error); + } + } + } +} \ No newline at end of file diff --git a/Runtime/Scripts/AudioProcessingModule.cs.meta b/Runtime/Scripts/AudioProcessingModule.cs.meta new file mode 100644 index 00000000..25a82999 --- /dev/null +++ b/Runtime/Scripts/AudioProcessingModule.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: bf605ac440b124e0f80a3a695dc7008a +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From cb23c17a733f119323477158184256be5bd411d0 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Thu, 3 Apr 2025 11:45:19 +1100 Subject: [PATCH 02/13] Expose internals for testing --- Runtime/Scripts/AudioFrame.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Runtime/Scripts/AudioFrame.cs b/Runtime/Scripts/AudioFrame.cs index 3aa450c8..c3a6886d 100644 --- a/Runtime/Scripts/AudioFrame.cs +++ b/Runtime/Scripts/AudioFrame.cs @@ -3,6 +3,8 @@ using LiveKit.Internal; using Unity.Collections; using Unity.Collections.LowLevel.Unsafe; +using System.Runtime.CompilerServices; +[assembly: InternalsVisibleTo("Tests")] namespace LiveKit { From aad73bc9857baabc9b4986d2c0b1536b95810c0c Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Thu, 3 Apr 2025 11:48:45 +1100 Subject: [PATCH 03/13] Test APM --- Tests/AudioProcessingModule.cs | 44 +++++++++++++++++++++++++++++ Tests/AudioProcessingModule.cs.meta | 11 ++++++++ 2 files changed, 55 insertions(+) create mode 100644 Tests/AudioProcessingModule.cs create mode 100644 Tests/AudioProcessingModule.cs.meta diff --git a/Tests/AudioProcessingModule.cs b/Tests/AudioProcessingModule.cs new file mode 100644 index 00000000..e9e727a6 --- /dev/null +++ b/Tests/AudioProcessingModule.cs @@ -0,0 +1,44 @@ +using System; +using NUnit.Framework; +using System.Runtime.InteropServices; + +namespace LiveKit.Tests +{ + public class AudioProcessingModuleTest + { + [Test] + public void TestAudioProcessing() + { + var apm = new AudioProcessingModule(true, true, true, true); + + apm.ProcessStream(CreateTestFrame()); + apm.ProcessReverseStream(CreateTestFrame()); + + Assert.Throws(() => apm.ProcessStream(CreateInvalidFrame())); + Assert.Throws(() => apm.ProcessReverseStream(CreateInvalidFrame())); + } + + private AudioFrame CreateTestFrame() + { + const int SampleRate = 48000; + const int NumChannels = 1; + const int FramesPerChunk = SampleRate / 100; + + var frame = new AudioFrame(SampleRate, NumChannels, FramesPerChunk); + + var data = new short[frame.SamplesPerChannel * frame.NumChannels]; + for (int i = 0; i < data.Length; i++) + { + // Generate a 440Hz sine wave + data[i] = (short)(short.MaxValue * Math.Sin(2 * Math.PI * 440 * i / frame.SampleRate)); + } + Marshal.Copy(data, 0, frame.Data, data.Length); + return frame; + } + + private AudioFrame CreateInvalidFrame() + { + return new AudioFrame(100, 1, 1); + } + } +} \ No newline at end of file diff --git a/Tests/AudioProcessingModule.cs.meta b/Tests/AudioProcessingModule.cs.meta new file mode 100644 index 00000000..a3bfd1b3 --- /dev/null +++ b/Tests/AudioProcessingModule.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 1d02b1160a42044c9a52fb5a1fffbd8d +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From 08925266553c65b04fd98e4476b9c0bae82c81b2 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Sat, 5 Apr 2025 16:19:33 +1100 Subject: [PATCH 04/13] Add set stream delay method --- Runtime/Scripts/AudioProcessingModule.cs | 30 +++++++++++++++++++ .../FFIClients/FfiRequestExtensions.cs | 13 ++++++++ Tests/AudioProcessingModule.cs | 2 +- 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/Runtime/Scripts/AudioProcessingModule.cs b/Runtime/Scripts/AudioProcessingModule.cs index 19d053e3..69937b96 100644 --- a/Runtime/Scripts/AudioProcessingModule.cs +++ b/Runtime/Scripts/AudioProcessingModule.cs @@ -96,5 +96,35 @@ public void ProcessReverseStream(AudioFrame data) throw new Exception(res.ApmProcessReverseStream.Error); } } + + /// + /// This must be called if and only if echo processing is enabled. + /// + /// + /// Sets the `delay` in milliseconds between receiving a far-end frame in + /// and receiving the corresponding echo in a near-end frame in . + /// + /// The delay can be calculated as: delay = (t_render - t_analyze) + (t_process - t_capture) + /// + /// Where: + /// - t_analyze: Time when frame is passed to + /// - t_render: Time when first sample of frame is rendered by audio hardware + /// - t_capture: Time when first sample of frame is captured by audio hardware + /// - t_process: Time when frame is passed to + /// + public void SetStreamDelayMs(int delayMs) + { + using var request = FFIBridge.Instance.NewRequest(); + var setStreamDelay = request.request; + setStreamDelay.ApmHandle = (ulong)Handle.DangerousGetHandle(); + setStreamDelay.DelayMs = delayMs; + + using var response = request.Send(); + FfiResponse res = response; + if (res.ApmSetStreamDelay.HasError) + { + throw new Exception(res.ApmSetStreamDelay.Error); + } + } } } \ No newline at end of file diff --git a/Runtime/Scripts/Internal/FFIClients/FfiRequestExtensions.cs b/Runtime/Scripts/Internal/FFIClients/FfiRequestExtensions.cs index e6a4716b..56b9e04e 100644 --- a/Runtime/Scripts/Internal/FFIClients/FfiRequestExtensions.cs +++ b/Runtime/Scripts/Internal/FFIClients/FfiRequestExtensions.cs @@ -87,6 +87,19 @@ public static void Inject(this FfiRequest ffiRequest, T request) case E2eeRequest e2EeRequest: ffiRequest.E2Ee = e2EeRequest; break; + // Apm + case NewApmRequest newApmRequest: + ffiRequest.NewApm = newApmRequest; + break; + case ApmProcessStreamRequest apmProcessStreamRequest: + ffiRequest.ApmProcessStream = apmProcessStreamRequest; + break; + case ApmProcessReverseStreamRequest apmProcessReverseStreamRequest: + ffiRequest.ApmProcessReverseStream = apmProcessReverseStreamRequest; + break; + case ApmSetStreamDelayRequest apmSetStreamDelayRequest: + ffiRequest.ApmSetStreamDelay = apmSetStreamDelayRequest; + break; // Rpc case RegisterRpcMethodRequest registerRpcMethodRequest: ffiRequest.RegisterRpcMethod = registerRpcMethodRequest; diff --git a/Tests/AudioProcessingModule.cs b/Tests/AudioProcessingModule.cs index e9e727a6..b6cdf996 100644 --- a/Tests/AudioProcessingModule.cs +++ b/Tests/AudioProcessingModule.cs @@ -10,7 +10,7 @@ public class AudioProcessingModuleTest public void TestAudioProcessing() { var apm = new AudioProcessingModule(true, true, true, true); - + apm.SetStreamDelayMs(100); apm.ProcessStream(CreateTestFrame()); apm.ProcessReverseStream(CreateTestFrame()); From 7c6f0990fed484ff2ff0d0c627ce6f7f4051424e Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Sat, 12 Apr 2025 11:04:17 +1000 Subject: [PATCH 05/13] Fix memory leak Allocated audio frame data is not disposed of when allocated by Unity --- Runtime/Scripts/AudioFrame.cs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Runtime/Scripts/AudioFrame.cs b/Runtime/Scripts/AudioFrame.cs index 3aa450c8..5b3441a5 100644 --- a/Runtime/Scripts/AudioFrame.cs +++ b/Runtime/Scripts/AudioFrame.cs @@ -23,6 +23,7 @@ public class AudioFrame : IDisposable public AudioFrameBufferInfo Info => _info; + private NativeArray _allocatedData; // Only used if the frame's data is allocated by Unity private IntPtr _dataPtr; public IntPtr Data => _dataPtr; @@ -44,8 +45,8 @@ internal AudioFrame(uint sampleRate, uint numChannels, uint samplesPerChannel) { _samplesPerChannel = samplesPerChannel; unsafe { - var data = new NativeArray(Length, Allocator.Persistent); - _dataPtr = (IntPtr)NativeArrayUnsafeUtility.GetUnsafePtr(data); + _allocatedData = new NativeArray(Length, Allocator.Persistent); + _dataPtr = (IntPtr)NativeArrayUnsafeUtility.GetUnsafePtr(_allocatedData); } } ~AudioFrame() @@ -63,6 +64,10 @@ protected virtual void Dispose(bool disposing) { if (!_disposed) { + if (_allocatedData.IsCreated) + { + _allocatedData.Dispose(); + } _disposed = true; } } From 6294861b4c51bc722d90de19616dc034a043060d Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Sat, 12 Apr 2025 13:12:51 +1000 Subject: [PATCH 06/13] Create audio buffer --- Runtime/Scripts/Internal/AudioBuffer.cs | 101 +++++++++++++++++++ Runtime/Scripts/Internal/AudioBuffer.cs.meta | 11 ++ Tests/AudioBuffer.cs | 31 ++++++ Tests/AudioBuffer.cs.meta | 11 ++ Tests/TestUtils.cs | 29 ++++++ Tests/TestUtils.cs.meta | 11 ++ 6 files changed, 194 insertions(+) create mode 100644 Runtime/Scripts/Internal/AudioBuffer.cs create mode 100644 Runtime/Scripts/Internal/AudioBuffer.cs.meta create mode 100644 Tests/AudioBuffer.cs create mode 100644 Tests/AudioBuffer.cs.meta create mode 100644 Tests/TestUtils.cs create mode 100644 Tests/TestUtils.cs.meta diff --git a/Runtime/Scripts/Internal/AudioBuffer.cs b/Runtime/Scripts/Internal/AudioBuffer.cs new file mode 100644 index 00000000..4b254aba --- /dev/null +++ b/Runtime/Scripts/Internal/AudioBuffer.cs @@ -0,0 +1,101 @@ +using System; +using LiveKit.Internal; + +namespace LiveKit +{ + /// + /// A thread-safe buffer for buffering audio samples. + /// + internal class AudioBuffer + { + private readonly uint _bufferDurationMs; + private RingBuffer _buffer; + private uint _channels; + private uint _sampleRate; + private object _lock = new object(); + + /// + /// Initializes a new audio sample buffer for holding samples for a given duration. + /// + internal AudioBuffer(uint bufferDurationMs = 200) + { + _bufferDurationMs = bufferDurationMs; + } + + /// + /// Write audio samples. + /// + /// + /// The float data will be converted to short format before being written to the buffer. + /// If the number of channels or sample rate changes, the buffer will be recreated. + /// + /// The audio samples to write. + /// The number of channels in the audio data. + /// The sample rate of the audio data in Hz. + internal void Write(float[] data, uint channels, uint sampleRate) + { + static short FloatToS16(float v) + { + v *= 32768f; + v = Math.Min(v, 32767f); + v = Math.Max(v, -32768f); + return (short)(v + Math.Sign(v) * 0.5f); + } + + var s16Data = new short[data.Length]; + for (int i = 0; i < data.Length; i++) + { + s16Data[i] = FloatToS16(data[i]); + } + Capture(s16Data, channels, sampleRate); + } + + private void Capture(short[] data, uint channels, uint sampleRate) + { + lock (_lock) + { + if (_buffer == null || channels != _channels || sampleRate != _sampleRate) + { + var size = (int)(channels * sampleRate * (_bufferDurationMs / 1000f)); + _buffer?.Dispose(); + _buffer = new RingBuffer(size * sizeof(short)); + _channels = channels; + _sampleRate = sampleRate; + } + unsafe + { + fixed (short* pData = data) + { + var byteData = new ReadOnlySpan(pData, data.Length * sizeof(short)); + _buffer.Write(byteData); + } + } + } + } + + /// + /// Reads a frame that is the length of the given duration. + /// + /// The duration of the audio samples to read in milliseconds. + /// An AudioFrame containing the read audio samples or if there is not enough samples, null. + internal AudioFrame ReadDuration(uint durationMs) + { + lock (_lock) + { + if (_buffer == null) return null; + + var samplesForDuration = (uint)(_sampleRate * (durationMs / 1000f)); + var requiredLength = samplesForDuration * _channels * sizeof(short); + if (_buffer.AvailableRead() < requiredLength) return null; + + var frame = new AudioFrame(_sampleRate, _channels, samplesForDuration); + unsafe + { + var frameData = new Span(frame.Data.ToPointer(), frame.Length); + _buffer.Read(frameData); + } + return frame; + } + } + } +} \ No newline at end of file diff --git a/Runtime/Scripts/Internal/AudioBuffer.cs.meta b/Runtime/Scripts/Internal/AudioBuffer.cs.meta new file mode 100644 index 00000000..fff9d644 --- /dev/null +++ b/Runtime/Scripts/Internal/AudioBuffer.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 0bf6612ef779c46b89010720549346d4 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Tests/AudioBuffer.cs b/Tests/AudioBuffer.cs new file mode 100644 index 00000000..4c373ad1 --- /dev/null +++ b/Tests/AudioBuffer.cs @@ -0,0 +1,31 @@ +using System; +using NUnit.Framework; + +namespace LiveKit.Tests +{ + public class AudioBufferTest + { + [Test] + [TestCase(24000u, 1u, 10u)] + [TestCase(48000u, 2u, 10u)] + public void TestWriteAndRead(uint sampleRate, uint channels, uint durationMs) + { + var buffer = new AudioBuffer(); + + Assert.IsNull(buffer.ReadDuration(durationMs), "Should not be able to read from empty buffer"); + + var samples = TestUtils.GenerateSineWave(channels, sampleRate, durationMs); + buffer.Write(samples, channels, sampleRate); + + Assert.IsNull(buffer.ReadDuration(durationMs * 2), "Should not be enough samples for this read"); + + var frame = buffer.ReadDuration(durationMs); + Assert.IsNotNull(frame); + Assert.AreEqual(sampleRate, frame.SampleRate); + Assert.AreEqual(channels, frame.NumChannels); + Assert.AreEqual(samples.Length / channels, frame.SamplesPerChannel); + + Assert.IsNull(buffer.ReadDuration(durationMs), "Should not be able to read again"); + } + } +} \ No newline at end of file diff --git a/Tests/AudioBuffer.cs.meta b/Tests/AudioBuffer.cs.meta new file mode 100644 index 00000000..6114b64f --- /dev/null +++ b/Tests/AudioBuffer.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 28f653860611048a3a9577a638d528f8 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Tests/TestUtils.cs b/Tests/TestUtils.cs new file mode 100644 index 00000000..4efac4c1 --- /dev/null +++ b/Tests/TestUtils.cs @@ -0,0 +1,29 @@ +using System; + +namespace LiveKit.Tests +{ + internal static class TestUtils + { + /// + /// Generates a sine wave with the specified parameters. + /// + /// Number of audio channels. + /// Sample rate in Hz. + /// Duration in milliseconds. + /// Frequency of the sine wave in Hz. + /// A float array containing the generated sine wave. + internal static float[] GenerateSineWave(uint channels, uint sampleRate, uint durationMs, uint frequency = 440) + { + var samplesPerChannel = sampleRate * durationMs / 1000; + var samples = new float[samplesPerChannel * channels]; + for (int i = 0; i < samplesPerChannel; i++) + { + float sampleValue = (float)Math.Sin(2 * Math.PI * frequency * i / sampleRate); + for (int channel = 0; channel < channels; channel++) + samples[i * channels + channel] = sampleValue; + } + return samples; + } + } +} + diff --git a/Tests/TestUtils.cs.meta b/Tests/TestUtils.cs.meta new file mode 100644 index 00000000..c6089f80 --- /dev/null +++ b/Tests/TestUtils.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 54e6ea638860b4ebabc52cfa4c96d7bb +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From 5569acca16ffe8151a56df7547f6f382f8437806 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Sat, 12 Apr 2025 13:24:03 +1000 Subject: [PATCH 07/13] Integrate audio buffer --- Runtime/Scripts/Internal/FFIClient.cs | 2 + .../Scripts/Internal/FFIClients/FFIEvents.cs | 2 + Runtime/Scripts/RtcAudioSource.cs | 96 +++++++------------ 3 files changed, 40 insertions(+), 60 deletions(-) diff --git a/Runtime/Scripts/Internal/FFIClient.cs b/Runtime/Scripts/Internal/FFIClient.cs index de47274b..16a09933 100644 --- a/Runtime/Scripts/Internal/FFIClient.cs +++ b/Runtime/Scripts/Internal/FFIClient.cs @@ -43,6 +43,7 @@ internal sealed class FfiClient : IFFIClient // participant events are not allowed in the fii protocol public event ParticipantEventReceivedDelegate ParticipantEventReceived; public event VideoStreamEventReceivedDelegate? VideoStreamEventReceived; public event AudioStreamEventReceivedDelegate? AudioStreamEventReceived; + public event CaptureAudioFrameReceivedDelegate? CaptureAudioFrameReceived; public event PerformRpcReceivedDelegate? PerformRpcReceived; @@ -275,6 +276,7 @@ static unsafe void FFICallback(UIntPtr data, UIntPtr size) Instance.AudioStreamEventReceived?.Invoke(r.AudioStreamEvent!); break; case FfiEvent.MessageOneofCase.CaptureAudioFrame: + Instance.CaptureAudioFrameReceived?.Invoke(r.CaptureAudioFrame!); break; case FfiEvent.MessageOneofCase.PerformRpc: Instance.PerformRpcReceived?.Invoke(r.PerformRpc!); diff --git a/Runtime/Scripts/Internal/FFIClients/FFIEvents.cs b/Runtime/Scripts/Internal/FFIClients/FFIEvents.cs index 8f2490e0..d8ffa0b0 100644 --- a/Runtime/Scripts/Internal/FFIClients/FFIEvents.cs +++ b/Runtime/Scripts/Internal/FFIClients/FFIEvents.cs @@ -44,6 +44,8 @@ namespace LiveKit.Internal internal delegate void SendTextReceivedDelegate(StreamSendTextCallback e); + internal delegate void CaptureAudioFrameReceivedDelegate(CaptureAudioFrameCallback e); + // Events internal delegate void RoomEventReceivedDelegate(RoomEvent e); diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index fcdc054e..12464c06 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -18,12 +18,12 @@ public enum RtcAudioSourceType public abstract class RtcAudioSource : IRtcSource { public abstract event Action AudioRead; - public virtual IEnumerator Prepare(float timeout = 0) { yield break; } + public virtual IEnumerator Prepare(float timeout = 0) { yield break; } public abstract void Play(); #if UNITY_IOS // iOS microphone sample rate is 24k, - // please make sure when you using + // please make sure when you using // sourceType is AudioSourceMicrophone public static uint DefaultMirophoneSampleRate = 24000; @@ -43,7 +43,7 @@ public abstract class RtcAudioSource : IRtcSource // Possibly used on the AudioThread private Thread _readAudioThread; - private ThreadSafeQueue _frameQueue = new ThreadSafeQueue(); + private AudioBuffer _captureBuffer = new AudioBuffer(); private bool _muted = false; public override bool Muted => _muted; @@ -85,7 +85,6 @@ public void Start() Stop(); _readAudioThread = new Thread(Update); _readAudioThread.Start(); - AudioRead += OnAudioRead; Play(); } @@ -101,78 +100,55 @@ private void Update() while (true) { Thread.Sleep(Constants.TASK_DELAY); - ReadAudio(); + var frame = _captureBuffer.ReadDuration(10); // 10ms + if (_muted || frame == null) continue; + Capture(frame); } } private void OnAudioRead(float[] data, int channels, int sampleRate) { - var samplesPerChannel = data.Length / channels; - var frame = new AudioFrame((uint)sampleRate, (uint)channels, (uint)samplesPerChannel); - - static short FloatToS16(float v) - { - v *= 32768f; - v = Math.Min(v, 32767f); - v = Math.Max(v, -32768f); - return (short)(v + Math.Sign(v) * 0.5f); - } - unsafe + _captureBuffer.Write(data, (uint)channels, (uint)sampleRate); + if (_sourceType == RtcAudioSourceType.AudioSourceMicrophone) { - var frameData = new Span(frame.Data.ToPointer(), frame.Length / sizeof(short)); - for (int i = 0; i < data.Length; i++) - { - frameData[i] = FloatToS16(data[i]); - } - if (_sourceType == RtcAudioSourceType.AudioSourceMicrophone) - { - // Don't play the audio locally, to avoid echo. - Array.Clear(data, 0, data.Length); - } + // Don't play the audio locally, to avoid echo. + Array.Clear(data, 0, data.Length); } - _frameQueue.Enqueue(frame); } - private void ReadAudio() + private void Capture(AudioFrame frame) { - while (_frameQueue.Count > 0) + using var request = FFIBridge.Instance.NewRequest(); + using var audioFrameBufferInfo = request.TempResource(); + + var pushFrame = request.request; + pushFrame.SourceHandle = (ulong)Handle.DangerousGetHandle(); + + pushFrame.Buffer = audioFrameBufferInfo; + pushFrame.Buffer.DataPtr = (ulong)frame.Data; + pushFrame.Buffer.NumChannels = frame.NumChannels; + pushFrame.Buffer.SampleRate = frame.SampleRate; + pushFrame.Buffer.SamplesPerChannel = frame.SamplesPerChannel; + + using var response = request.Send(); + FfiResponse res = response; + + // Frame needs to stay alive until receiving the async callback. + var asyncId = res.CaptureAudioFrame.AsyncId; + void Callback(CaptureAudioFrameCallback callback) { - try - { - AudioFrame frame = _frameQueue.Dequeue(); - - if(_muted) - { - continue; - } - unsafe - { - using var request = FFIBridge.Instance.NewRequest(); - using var audioFrameBufferInfo = request.TempResource(); - - var pushFrame = request.request; - pushFrame.SourceHandle = (ulong)Handle.DangerousGetHandle(); - - pushFrame.Buffer = audioFrameBufferInfo; - pushFrame.Buffer.DataPtr = (ulong)frame.Data; - pushFrame.Buffer.NumChannels = frame.NumChannels; - pushFrame.Buffer.SampleRate = frame.SampleRate; - pushFrame.Buffer.SamplesPerChannel = frame.SamplesPerChannel; - - using var response = request.Send(); - } - } - catch (Exception e) - { - Utils.Error("Audio Framedata error: " + e.Message); - } + if (callback.AsyncId != asyncId) return; + if (callback.HasError) + Utils.Error($"Audio capture failed: {callback.Error}"); + frame.Dispose(); + FfiClient.Instance.CaptureAudioFrameReceived -= Callback; } + FfiClient.Instance.CaptureAudioFrameReceived += Callback; } public override void SetMute(bool muted) { _muted = muted; } - } -} +} \ No newline at end of file From 51c596b06e0e19a1752711a982b8beb76c9c6059 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Sat, 12 Apr 2025 15:06:04 +1000 Subject: [PATCH 08/13] Integrate apm --- Runtime/Scripts/ApmReverseStream.cs | 63 ++++++++++++++++++++++++ Runtime/Scripts/ApmReverseStream.cs.meta | 11 +++++ Runtime/Scripts/AudioProcessingModule.cs | 5 ++ Runtime/Scripts/RtcAudioSource.cs | 33 ++++++++----- 4 files changed, 99 insertions(+), 13 deletions(-) create mode 100644 Runtime/Scripts/ApmReverseStream.cs create mode 100644 Runtime/Scripts/ApmReverseStream.cs.meta diff --git a/Runtime/Scripts/ApmReverseStream.cs b/Runtime/Scripts/ApmReverseStream.cs new file mode 100644 index 00000000..ac26cb0f --- /dev/null +++ b/Runtime/Scripts/ApmReverseStream.cs @@ -0,0 +1,63 @@ +using System.Threading; +using LiveKit.Internal; +using UnityEngine; + +namespace LiveKit +{ + /// + /// Captures and processes the reverse audio stream using an . + /// + /// + /// The reverse stream is captured from the scene's audio listener. + /// + internal class ApmReverseStream + { + private readonly AudioBuffer _captureBuffer = new AudioBuffer(); + private readonly AudioProcessingModule _apm; // APM is thread safe + private Thread _thread; + private AudioFilter _audioFilter; + + internal ApmReverseStream(AudioProcessingModule apm) + { + _apm = apm; + } + + internal void Start() + { + var audioListener = GameObject.FindObjectOfType(); + if (audioListener == null) + { + Utils.Error("AudioListener not found in scene"); + return; + } + _audioFilter = audioListener.gameObject.AddComponent(); + _audioFilter.AudioRead += OnAudioRead; + + _thread = new Thread(ProcessReverseStream); + _thread.Start(); + } + + internal void Stop() + { + _thread?.Abort(); + if (_audioFilter != null) + Object.Destroy(_audioFilter); + } + + private void ProcessReverseStream() + { + while (true) + { + Thread.Sleep(Constants.TASK_DELAY); + using var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS); + if (frame == null) continue; + _apm.ProcessReverseStream(frame); + } + } + + private void OnAudioRead(float[] data, int channels, int sampleRate) + { + _captureBuffer.Write(data, (uint)channels, (uint)sampleRate); + } + } +} \ No newline at end of file diff --git a/Runtime/Scripts/ApmReverseStream.cs.meta b/Runtime/Scripts/ApmReverseStream.cs.meta new file mode 100644 index 00000000..0f388abd --- /dev/null +++ b/Runtime/Scripts/ApmReverseStream.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 4075a37ec813b43249c214c09e5aba2a +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Scripts/AudioProcessingModule.cs b/Runtime/Scripts/AudioProcessingModule.cs index 69937b96..d80c3c70 100644 --- a/Runtime/Scripts/AudioProcessingModule.cs +++ b/Runtime/Scripts/AudioProcessingModule.cs @@ -126,5 +126,10 @@ public void SetStreamDelayMs(int delayMs) throw new Exception(res.ApmSetStreamDelay.Error); } } + + /// + /// The required duration for audio frames being processed. + /// + public const uint FRAME_DURATION_MS = 10; } } \ No newline at end of file diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index 12464c06..a2685439 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -44,30 +44,26 @@ public abstract class RtcAudioSource : IRtcSource // Possibly used on the AudioThread private Thread _readAudioThread; private AudioBuffer _captureBuffer = new AudioBuffer(); + private readonly AudioProcessingModule _apm; + private readonly ApmReverseStream _apmReverseStream; private bool _muted = false; public override bool Muted => _muted; protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom) { + var isMicrophone = audioSourceType == RtcAudioSourceType.AudioSourceMicrophone; _sourceType = audioSourceType; + _apm = new AudioProcessingModule(isMicrophone, true, true, true); + if (isMicrophone) + _apmReverseStream = new ApmReverseStream(_apm); using var request = FFIBridge.Instance.NewRequest(); var newAudioSource = request.request; newAudioSource.Type = AudioSourceType.AudioSourceNative; newAudioSource.NumChannels = (uint)channels; - if(_sourceType == RtcAudioSourceType.AudioSourceMicrophone) - { - newAudioSource.SampleRate = DefaultMirophoneSampleRate; - } - else - { - newAudioSource.SampleRate = DefaultSampleRate; - } - newAudioSource.Options = request.TempResource(); - newAudioSource.Options.EchoCancellation = true; - newAudioSource.Options.AutoGainControl = true; - newAudioSource.Options.NoiseSuppression = true; + newAudioSource.SampleRate = isMicrophone ? DefaultMirophoneSampleRate : DefaultSampleRate; + using var response = request.Send(); FfiResponse res = response; _info = res.NewAudioSource.Source.Info; @@ -85,6 +81,7 @@ public void Start() Stop(); _readAudioThread = new Thread(Update); _readAudioThread.Start(); + _apmReverseStream?.Start(); AudioRead += OnAudioRead; Play(); } @@ -92,6 +89,7 @@ public void Start() public virtual void Stop() { _readAudioThread?.Abort(); + _apmReverseStream?.Stop(); AudioRead -= OnAudioRead; } @@ -100,8 +98,17 @@ private void Update() while (true) { Thread.Sleep(Constants.TASK_DELAY); - var frame = _captureBuffer.ReadDuration(10); // 10ms + var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS); if (_muted || frame == null) continue; + + if (_apmReverseStream != null) + { + // TODO: calculate stream delay + var delayMs = 0; + _apm.SetStreamDelayMs(delayMs); + } + _apm.ProcessStream(frame); + Capture(frame); } } From fc0e83f06995a3199efb43812006d6f4b836fc5d Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Tue, 15 Apr 2025 08:59:15 +1000 Subject: [PATCH 09/13] Call superclass stop method --- Runtime/Scripts/BasicAudioSource.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs index 24157988..e1a2c31d 100644 --- a/Runtime/Scripts/BasicAudioSource.cs +++ b/Runtime/Scripts/BasicAudioSource.cs @@ -29,6 +29,7 @@ public override void Play() public override void Stop() { + base.Stop(); _audioFilter.AudioRead -= OnAudioRead; Source.Stop(); } From bbad26ac5d4c512f783c7cef36020c1058dbdadc Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Sat, 26 Apr 2025 09:28:50 +1000 Subject: [PATCH 10/13] Set stream delay --- Runtime/Scripts/RtcAudioSource.cs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index a2685439..47c58be2 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -4,6 +4,7 @@ using LiveKit.Internal; using System.Threading; using LiveKit.Internal.FFIClients.Requests; +using UnityEngine; namespace LiveKit { @@ -52,11 +53,15 @@ public abstract class RtcAudioSource : IRtcSource protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom) { - var isMicrophone = audioSourceType == RtcAudioSourceType.AudioSourceMicrophone; _sourceType = audioSourceType; + + var isMicrophone = audioSourceType == RtcAudioSourceType.AudioSourceMicrophone; _apm = new AudioProcessingModule(isMicrophone, true, true, true); if (isMicrophone) + { _apmReverseStream = new ApmReverseStream(_apm); + _apm.SetStreamDelayMs(EstimateStreamDelayMs()); + } using var request = FFIBridge.Instance.NewRequest(); var newAudioSource = request.request; @@ -101,14 +106,8 @@ private void Update() var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS); if (_muted || frame == null) continue; - if (_apmReverseStream != null) - { - // TODO: calculate stream delay - var delayMs = 0; - _apm.SetStreamDelayMs(delayMs); - } - _apm.ProcessStream(frame); - + if (_apm != null) + _apm.ProcessStream(frame); Capture(frame); } } @@ -157,5 +156,14 @@ public override void SetMute(bool muted) { _muted = muted; } + + private int EstimateStreamDelayMs() + { + // TODO: estimate more accurately + int bufferLength, numBuffers; + int sampleRate = AudioSettings.outputSampleRate; + AudioSettings.GetDSPBufferSize(out bufferLength, out numBuffers); + return 2 * (int)(1000f * bufferLength * numBuffers / sampleRate); + } } } \ No newline at end of file From 5f71c75f62f3391ce1c370d7eb742c2d60e7f1f7 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Tue, 29 Apr 2025 17:02:46 +1000 Subject: [PATCH 11/13] Process & capture on audio thread --- Runtime/Scripts/ApmReverseStream.cs | 18 ++++-------------- Runtime/Scripts/RtcAudioSource.cs | 23 +++++------------------ 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/Runtime/Scripts/ApmReverseStream.cs b/Runtime/Scripts/ApmReverseStream.cs index ac26cb0f..4490729e 100644 --- a/Runtime/Scripts/ApmReverseStream.cs +++ b/Runtime/Scripts/ApmReverseStream.cs @@ -1,4 +1,3 @@ -using System.Threading; using LiveKit.Internal; using UnityEngine; @@ -14,7 +13,6 @@ internal class ApmReverseStream { private readonly AudioBuffer _captureBuffer = new AudioBuffer(); private readonly AudioProcessingModule _apm; // APM is thread safe - private Thread _thread; private AudioFilter _audioFilter; internal ApmReverseStream(AudioProcessingModule apm) @@ -32,32 +30,24 @@ internal void Start() } _audioFilter = audioListener.gameObject.AddComponent(); _audioFilter.AudioRead += OnAudioRead; - - _thread = new Thread(ProcessReverseStream); - _thread.Start(); } internal void Stop() { - _thread?.Abort(); if (_audioFilter != null) Object.Destroy(_audioFilter); } - private void ProcessReverseStream() + private void OnAudioRead(float[] data, int channels, int sampleRate) { + _captureBuffer.Write(data, (uint)channels, (uint)sampleRate); while (true) { - Thread.Sleep(Constants.TASK_DELAY); using var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS); - if (frame == null) continue; + if (frame == null) break; + _apm.ProcessReverseStream(frame); } } - - private void OnAudioRead(float[] data, int channels, int sampleRate) - { - _captureBuffer.Write(data, (uint)channels, (uint)sampleRate); - } } } \ No newline at end of file diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index 47c58be2..d0ab7d4f 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -2,7 +2,6 @@ using System.Collections; using LiveKit.Proto; using LiveKit.Internal; -using System.Threading; using LiveKit.Internal.FFIClients.Requests; using UnityEngine; @@ -43,7 +42,6 @@ public abstract class RtcAudioSource : IRtcSource protected AudioSourceInfo _info; // Possibly used on the AudioThread - private Thread _readAudioThread; private AudioBuffer _captureBuffer = new AudioBuffer(); private readonly AudioProcessingModule _apm; private readonly ApmReverseStream _apmReverseStream; @@ -84,8 +82,6 @@ public IEnumerator PrepareAndStart() public void Start() { Stop(); - _readAudioThread = new Thread(Update); - _readAudioThread.Start(); _apmReverseStream?.Start(); AudioRead += OnAudioRead; Play(); @@ -93,33 +89,24 @@ public void Start() public virtual void Stop() { - _readAudioThread?.Abort(); _apmReverseStream?.Stop(); AudioRead -= OnAudioRead; } - private void Update() + private void OnAudioRead(float[] data, int channels, int sampleRate) { + _captureBuffer.Write(data, (uint)channels, (uint)sampleRate); while (true) { - Thread.Sleep(Constants.TASK_DELAY); var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS); - if (_muted || frame == null) continue; + if (_muted || frame == null) break; - if (_apm != null) - _apm.ProcessStream(frame); + if (_apm != null) _apm.ProcessStream(frame); Capture(frame); } - } - - private void OnAudioRead(float[] data, int channels, int sampleRate) - { - _captureBuffer.Write(data, (uint)channels, (uint)sampleRate); + // Don't play the audio locally, to avoid echo. if (_sourceType == RtcAudioSourceType.AudioSourceMicrophone) - { - // Don't play the audio locally, to avoid echo. Array.Clear(data, 0, data.Length); - } } private void Capture(AudioFrame frame) From af3d8ec6ccb5dc21f267b9f05b647e2ecdbd9899 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Tue, 29 Apr 2025 21:22:31 +1000 Subject: [PATCH 12/13] Optimize clear --- Runtime/Scripts/RtcAudioSource.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index d0ab7d4f..601619c2 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -106,7 +106,10 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) } // Don't play the audio locally, to avoid echo. if (_sourceType == RtcAudioSourceType.AudioSourceMicrophone) - Array.Clear(data, 0, data.Length); + { + Span span = data; + span.Clear(); + } } private void Capture(AudioFrame frame) From 89437aabac7b9db3ebf98d13e7478e85f525c336 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Tue, 29 Apr 2025 21:23:46 +1000 Subject: [PATCH 13/13] Remove locking from AudioBuffer --- Runtime/Scripts/Internal/AudioBuffer.cs | 53 +++++++++++-------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/Runtime/Scripts/Internal/AudioBuffer.cs b/Runtime/Scripts/Internal/AudioBuffer.cs index 4b254aba..9a9d1eeb 100644 --- a/Runtime/Scripts/Internal/AudioBuffer.cs +++ b/Runtime/Scripts/Internal/AudioBuffer.cs @@ -4,7 +4,7 @@ namespace LiveKit { /// - /// A thread-safe buffer for buffering audio samples. + /// A ring buffer for audio samples. /// internal class AudioBuffer { @@ -12,7 +12,6 @@ internal class AudioBuffer private RingBuffer _buffer; private uint _channels; private uint _sampleRate; - private object _lock = new object(); /// /// Initializes a new audio sample buffer for holding samples for a given duration. @@ -52,23 +51,20 @@ static short FloatToS16(float v) private void Capture(short[] data, uint channels, uint sampleRate) { - lock (_lock) + if (_buffer == null || channels != _channels || sampleRate != _sampleRate) { - if (_buffer == null || channels != _channels || sampleRate != _sampleRate) - { - var size = (int)(channels * sampleRate * (_bufferDurationMs / 1000f)); - _buffer?.Dispose(); - _buffer = new RingBuffer(size * sizeof(short)); - _channels = channels; - _sampleRate = sampleRate; - } - unsafe + var size = (int)(channels * sampleRate * (_bufferDurationMs / 1000f)); + _buffer?.Dispose(); + _buffer = new RingBuffer(size * sizeof(short)); + _channels = channels; + _sampleRate = sampleRate; + } + unsafe + { + fixed (short* pData = data) { - fixed (short* pData = data) - { - var byteData = new ReadOnlySpan(pData, data.Length * sizeof(short)); - _buffer.Write(byteData); - } + var byteData = new ReadOnlySpan(pData, data.Length * sizeof(short)); + _buffer.Write(byteData); } } } @@ -80,22 +76,19 @@ private void Capture(short[] data, uint channels, uint sampleRate) /// An AudioFrame containing the read audio samples or if there is not enough samples, null. internal AudioFrame ReadDuration(uint durationMs) { - lock (_lock) - { - if (_buffer == null) return null; + if (_buffer == null) return null; - var samplesForDuration = (uint)(_sampleRate * (durationMs / 1000f)); - var requiredLength = samplesForDuration * _channels * sizeof(short); - if (_buffer.AvailableRead() < requiredLength) return null; + var samplesForDuration = (uint)(_sampleRate * (durationMs / 1000f)); + var requiredLength = samplesForDuration * _channels * sizeof(short); + if (_buffer.AvailableRead() < requiredLength) return null; - var frame = new AudioFrame(_sampleRate, _channels, samplesForDuration); - unsafe - { - var frameData = new Span(frame.Data.ToPointer(), frame.Length); - _buffer.Read(frameData); - } - return frame; + var frame = new AudioFrame(_sampleRate, _channels, samplesForDuration); + unsafe + { + var frameData = new Span(frame.Data.ToPointer(), frame.Length); + _buffer.Read(frameData); } + return frame; } } } \ No newline at end of file