Skip to content

Commit

Permalink
Endless loop microphone and memory leak fix (Macoron#55)
Browse files Browse the repository at this point in the history
* Started working on a loop microphone

* Clean up

* Fixed memory leak

* Comments

* Stuff

* Some params changes

* Updated chunk logic

* Minor changes
  • Loading branch information
Macoron authored Sep 14, 2023
1 parent be4c7fa commit 7f4c33f
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 59 deletions.
211 changes: 153 additions & 58 deletions Runtime/Utils/MicrophoneRecord.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
namespace Whisper.Utils
{
/// <summary>
/// Small portion of recorded audio.
/// Portion of recorded audio clip.
/// </summary>
public struct AudioChunk
{
Expand All @@ -22,15 +22,17 @@ public struct AudioChunk

public delegate void OnVadChangedDelegate(bool isSpeechDetected);
public delegate void OnChunkReadyDelegate(AudioChunk chunk);
public delegate void OnRecordStopDelegate(float[] data, int frequency, int channels, float length);
public delegate void OnRecordStopDelegate(AudioChunk recordedAudio);

/// <summary>
/// Controls microphone input settings and recording.
/// </summary>
public class MicrophoneRecord : MonoBehaviour
{
[Tooltip("Max length of recorded audio from microphone in seconds")]
public int maxLengthSec = 30;
public int maxLengthSec = 60;
[Tooltip("After reaching max length microphone record will continue")]
public bool loop;
[Tooltip("Microphone sample rate")]
public int frequency = 16000;
[Tooltip("Length of audio chunks in seconds, useful for streaming")]
Expand All @@ -48,7 +50,7 @@ public class MicrophoneRecord : MonoBehaviour
[Tooltip("Window size where VAD tries to detect speech")]
public float vadLastSec = 1.25f;
[Tooltip("Threshold of VAD energy activation")]
public float vadThd = 0.6f;
public float vadThd = 1.0f;
[Tooltip("Threshold of VAD filter frequency")]
public float vadFreqThd = 100.0f;
[Tooltip("Optional indicator that changes color when speech detected")]
Expand Down Expand Up @@ -78,18 +80,21 @@ public class MicrophoneRecord : MonoBehaviour
public event OnChunkReadyDelegate OnChunkReady;
/// <summary>
/// Raised when microphone record stopped.
/// Returns <see cref="maxLengthSec"/> or less of recorded audio.
/// </summary>
public event OnRecordStopDelegate OnRecordStop;

private float _recordStart;

private int _lastVadPos;
private AudioClip _clip;
private float _length;
private int _lastChunkPos;
private int _chunksLength;
private float? _vadStopBegin;

private int _lastMicPos;
private bool _madeLoopLap;

private string _selectedMicDevice;

public string SelectedMicDevice
{
get => _selectedMicDevice;
Expand All @@ -101,6 +106,8 @@ public string SelectedMicDevice
}
}

public int ClipSamples => _clip.samples * _clip.channels;

public string RecordStartMicDevice { get; private set; }
public bool IsRecording { get; private set; }
public bool IsVoiceDetected { get; private set; }
Expand All @@ -126,20 +133,31 @@ private void Update()
if (!IsRecording)
return;

// check that recording reached max time
var timePassed = Time.realtimeSinceStartup - _recordStart;
if (timePassed > maxLengthSec)
// lets check current mic position time
var micPos = Microphone.GetPosition(RecordStartMicDevice);
if (micPos < _lastMicPos)
{
StopRecord();
return;
// looks like mic started recording in loop
// lets check if we even allow do that?
_madeLoopLap = true;
if (!loop)
{
LogUtils.Verbose($"Stopping recording, mic pos returned back to {micPos}");
StopRecord();
return;
}

// all cool, we can work in loop
LogUtils.Verbose($"Mic made a new loop lap, continue recording.");
}

_lastMicPos = micPos;

// still recording - update chunks and vad
UpdateChunks();
UpdateVad();
UpdateChunks(micPos);
UpdateVad(micPos);
}

private void UpdateChunks()
private void UpdateChunks(int micPos)
{
// is anyone even subscribe to do this?
if (OnChunkReady == null)
Expand All @@ -150,8 +168,7 @@ private void UpdateChunks()
return;

// get current chunk length
var samplesCount = Microphone.GetPosition(RecordStartMicDevice);
var chunk = samplesCount - _lastChunkPos;
var chunk = GetMicPosDist(_lastChunkPos, micPos);

// send new chunks while there has valid size
while (chunk > _chunksLength)
Expand All @@ -169,49 +186,46 @@ private void UpdateChunks()
};
OnChunkReady(chunkStruct);

_lastChunkPos += _chunksLength;
chunk = samplesCount - _lastChunkPos;
_lastChunkPos = (_lastChunkPos + _chunksLength) % ClipSamples;
chunk = GetMicPosDist(_lastChunkPos, micPos);
}
}

private void UpdateVad()
private void UpdateVad(int micPos)
{
if (!useVad)
return;

// get current position of microphone header
var samplesCount = Microphone.GetPosition(RecordStartMicDevice);
// get current recorded clip length
var samplesCount = GetMicBufferLength(micPos);
if (samplesCount <= 0)
return;

// check if it's time to update
var vadUpdateRateSamples = vadUpdateRateSec * _clip.frequency;
var dt = samplesCount - _lastVadPos;
var dt = GetMicPosDist(_lastVadPos, micPos);
if (dt < vadUpdateRateSamples)
return;
_lastVadPos = samplesCount;

// try to get sample for voice detection
var vadContextSamples = (int) (_clip.frequency * vadContextSec);
var dataLength = Math.Min(vadContextSamples, samplesCount);
var offset = Math.Max(samplesCount - vadContextSamples, 0);

var data = new float[dataLength];
_clip.GetData(data, offset);

var data = GetMicBufferLast(micPos, vadContextSec);
var vad = AudioUtils.SimpleVad(data, _clip.frequency, vadLastSec, vadThd, vadFreqThd);
if (vadIndicatorImage)
{
var color = vad ? Color.green : Color.red;
vadIndicatorImage.color = color;
}

// raise event if vad has changed
if (vad != IsVoiceDetected)
{
_vadStopBegin = !vad ? Time.realtimeSinceStartup : (float?) null;
IsVoiceDetected = vad;
OnVadChanged?.Invoke(vad);
}

// update vad indicator
if (vadIndicatorImage)
{
var color = vad ? Color.green : Color.red;
vadIndicatorImage.color = color;
}

UpdateVadStop();
}
Expand All @@ -236,60 +250,141 @@ private void OnMicrophoneChanged(int ind)
SelectedMicDevice = opt.text == microphoneDefaultLabel ? null : opt.text;
}

/// <summary>
/// Start microphone recording
/// </summary>
public void StartRecord()
{
if (IsRecording)
return;

_recordStart = Time.realtimeSinceStartup;

RecordStartMicDevice = SelectedMicDevice;
_clip = Microphone.Start(RecordStartMicDevice, false, maxLengthSec, frequency);
_clip = Microphone.Start(RecordStartMicDevice, loop, maxLengthSec, frequency);
IsRecording = true;


_lastMicPos = 0;
_madeLoopLap = false;
_lastChunkPos = 0;
_lastVadPos = 0;
_vadStopBegin = null;
_chunksLength = (int) (_clip.frequency * _clip.channels * chunksLengthSec);
}

/// <summary>
/// Stop microphone record.
/// </summary>
/// <param name="dropTimeSec">How many last recording seconds to drop.</param>
public void StopRecord(float dropTimeSec = 0f)
{
if (!IsRecording)
return;

var data = GetTrimmedData(dropTimeSec);
if (echo)

// get all data from mic audio clip
var data = GetMicBuffer(dropTimeSec);
var finalAudio = new AudioChunk()
{
var echoClip = AudioClip.Create("echo", data.Length,
_clip.channels, _clip.frequency, false);
echoClip.SetData(data, 0);
AudioSource.PlayClipAtPoint(echoClip, Vector3.zero);
}

Data = data,
Channels = _clip.channels,
Frequency = _clip.frequency,
IsVoiceDetected = IsVoiceDetected,
Length = (float) data.Length / (_clip.frequency * _clip.channels)
};

// stop mic audio recording
Microphone.End(RecordStartMicDevice);
IsRecording = false;
_length = Time.realtimeSinceStartup - _recordStart;

Destroy(_clip);
LogUtils.Verbose($"Stopped microphone recording. Final audio length " +
$"{finalAudio.Length} ({finalAudio.Data.Length} samples)");

// update VAD, no speech with disabled mic
if (IsVoiceDetected)
{
IsVoiceDetected = false;
OnVadChanged?.Invoke(false);
}

// play echo sound
if (echo)
{
var echoClip = AudioClip.Create("echo", data.Length,
_clip.channels, _clip.frequency, false);
echoClip.SetData(data, 0);
PlayAudioAndDestroy.Play(echoClip, Vector3.zero);
}

OnRecordStop?.Invoke(data, _clip.frequency, _clip.channels, _length);
// finally, fire event
OnRecordStop?.Invoke(finalAudio);
}

private float[] GetTrimmedData(float dropTimeSec = 0f)
/// <summary>
/// Get all recorded mic buffer.
/// </summary>
private float[] GetMicBuffer(float dropTimeSec = 0f)
{
// get microphone samples and current position
var pos = Microphone.GetPosition(RecordStartMicDevice);
var len = pos == 0 ? _clip.samples * _clip.channels : pos;
var dropTimeSamples = (int) (_clip.frequency * _clip.channels * dropTimeSec);
var micPos = Microphone.GetPosition(RecordStartMicDevice);
var len = GetMicBufferLength(micPos);
if (len == 0) return Array.Empty<float>();

// drop last samples from length if necessary
var dropTimeSamples = (int) (_clip.frequency * dropTimeSec);
len = Math.Max(0, len - dropTimeSamples);

// get last len samples from recorded audio
// offset used to get audio from previous circular buffer lap
var data = new float[len];
_clip.GetData(data, 0);
var offset = _madeLoopLap ? micPos : 0;
_clip.GetData(data, offset);

return data;
}

/// <summary>
/// Get last sec of recorded mic buffer.
/// </summary>
private float[] GetMicBufferLast(int micPos, float lastSec)
{
var len = GetMicBufferLength(micPos);
if (len == 0)
return Array.Empty<float>();

var lastSamples = (int) (_clip.frequency * lastSec);
var dataLength = Math.Min(lastSamples, len);
var offset = micPos - dataLength;
if (offset < 0) offset = len + offset;

var data = new float[dataLength];
_clip.GetData(data, offset);
return data;
}

/// <summary>
/// Get mic buffer length that was actually recorded.
/// </summary>
private int GetMicBufferLength(int micPos)
{
// looks like we just started recording and stopped it immediately
// nothing was actually recorded
if (micPos == 0 && !_madeLoopLap)
return 0;

// get length of the mic buffer that we want to return
// this need to account circular loop buffer
var len = _madeLoopLap ? ClipSamples : micPos;
return len;
}

/// <summary>
/// Calculate distance between two mic positions.
/// It takes circular buffer into account.
/// </summary>
private int GetMicPosDist(int prevPos, int newPos)
{
if (newPos >= prevPos)
return newPos - prevPos;

// circular buffer case
return ClipSamples - prevPos + newPos;
}
}
}
Loading

0 comments on commit 7f4c33f

Please sign in to comment.