Skip to content
This repository was archived by the owner on Apr 8, 2025. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/app/macaroni_app.csproj
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>

<SpeechSDKVersion>1.29</SpeechSDKVersion>
<SpeechSDKVersion>1.30.0-alpha.0.38264386</SpeechSDKVersion>
<SpeechSDKEmbedded>false</SpeechSDKEmbedded>

<OutputType>WinExe</OutputType>
<TargetFramework>net6.0-windows</TargetFramework>
<RootNamespace>macaroni</RootNamespace>
Expand Down
208 changes: 208 additions & 0 deletions src/cs/Command/Triggers/EmbeddedIntentTriggerCommandSetExtension.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
using System.Linq;
using System.Text;
using macaroni.DataTypes;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Intent;
using static macaroni.HotkeyTriggerCommandSetExtension;

namespace macaroni;

internal class EmbeddedIntentTriggerCommandSetExtension : ICommandSetExtension, INotifyCondition, INotifyYamlValues
{
public void InitCommandSetExtension(ICommandSet commandSet)
{
_commandSet = commandSet;
_commandExecutionService = _commandSet?.GetParentOrService<ICommandExecutionService>();

_recognizer = _commandSet?.GetParentOrService<IIntentRecognizerService>();
_intentGroupId = _commandSet?.Id + "intents" ?? "";
}

public void EnableExtension(bool enabled)
{
if (enabled)
{
ReloadEmbeddedLUModel();
RegisterCallbacks();
}
else
{
RemoveIntentCallbacks();
}
}

public void ValueChanged(IYamlValue value)
{
MR.DBG_TRACE_INFO($"IntentTriggerCommandSetExtension::ValueChanged({value.GetType().Name})");
ReloadEmbeddedLUModel();
RegisterCallbacks();
MR.DBG_TRACE_INFO($"IntentTriggerCommandSetExtension::ValueChanged({value.GetType().Name}) ... Done!");
}

public void ConditionChanged(ICondition condition)
{
MR.DBG_TRACE_INFO($"IntentTriggerCommandSetExtension::ConditionChanged({condition.GetType().Name})");
ReloadEmbeddedLUModel();
RegisterCallbacks();
MR.DBG_TRACE_INFO($"IntentTriggerCommandSetExtension::ConditionChanged({condition.GetType().Name}) ... Done!");
}

private void RegisterCallbacks()
{
_recognizer?.RemoveIntentCallbackGroup(_intentGroupId);

List<string> intentTriggers = new List<string>();

foreach (var command in _commandSet?.Commands ?? Enumerable.Empty<ICommand>())
{
if (command.Triggers == null) continue;
if (!command.IsSatisfied()) continue;

var triggers = command.Triggers.ToList();

if (command.Expecting != null)
{
triggers.AddRange(command.Expecting);
}

foreach (var trigger in triggers)
{
var intentTrigger = trigger as IIntentTrigger;
if (intentTrigger != null)
{
_recognizer?.RegisterIntentCallback(intentTrigger.IntentId, intentTrigger.EntityId, _intentGroupId, x => InvokeCommand(x));
}
}
}
}

private void ReloadEmbeddedLUModel()
{
if (_model != null)
{
_recognizer?.UnloadEmbeddedLUModel(_model);
_model = null;
}
LoadEmbeddedLUModel(_recognizer);
}

private void LoadEmbeddedLUModel(IIntentRecognizerService? recognizer)
{
var model = CreateEmbeddedLUModel();
if (model != null)
{
recognizer?.LoadEmbeddedLUModel(model);
}
}

private EmbeddedLanguageUnderstandingModel? CreateEmbeddedLUModel()
{
var satisfied = _commandSet?.Conditions?.IsSatisfied();
if (satisfied != null && !satisfied.Value) return null;
var settings = _commandSet?.GetRequiredParentOrService<ICommandSystemSettings>();

var modelId = _commandSet?.Id;

string?[] embeddedPaths = {
settings?.Get("EMBEDDED_LANGUAGE_UNDERSTANDING_MODEL_PATH", "")
};
var pathsOk = embeddedPaths.All(x => !string.IsNullOrEmpty(x)); // nateko
var luModelPath = settings.Get("EMBEDDED_LANGUAGE_UNDERSTANDING_MODEL_PATH", "");
if (!string.IsNullOrEmpty(luModelPath))
{
var luIniConfig = System.IO.Path.Combine(luModelPath, "lu.ini");
var luModel = EmbeddedLanguageUnderstandingModel.FromIniFile(luIniConfig, "");
return luModel;
}
return null;
}

private void RemoveIntentCallbacks()
{
_recognizer?.RemoveIntentCallbackGroup(_intentGroupId);
}

private void InvokeCommand(IntentResult result)
{
MR.DBG_TRACE_INFO($"--- INVOKING COMMAND: {result.IntentId}");

var command = GetCommand(result);
_commandExecutionService?.Execute(command, context => RegisterResolver(context, result));

MR.DBG_TRACE_INFO($"- INVOKING COMMAND: {result.IntentId} ... Done!\n");
}

private ICommand? GetCommand(IntentResult result)
{
return _commandSet?.Commands?.FirstOrDefault<ICommand>(command =>
{
return null != command?.Expecting?.FirstOrDefault<ICommandTrigger>(trigger => FindMatchingTrigger(result, trigger)) ||
null != command?.Triggers?.FirstOrDefault<ICommandTrigger>(trigger => FindMatchingTrigger(result, trigger));
});
}

private static bool FindMatchingTrigger(IntentResult result, ICommandTrigger? trigger)
{
var intentTrigger = trigger as IIntentTrigger;

var intentOk = intentTrigger != null && intentTrigger.IntentId == result.IntentId;
if (!intentOk) return false;

var entityOk = intentTrigger!.EntityId == null || result.Entities.ContainsKey(intentTrigger.EntityId);
if (!entityOk) return false;

return true;
}

private void RegisterResolver(IExecutionContext context, IntentResult result)
{
context.RegisterResolver(
nameof(IntentTriggerCommandSetExtension),
name => ResolveContext(result, name));
}

private object? ResolveContext(IntentResult result, string name)
{
if (name == "context.keys") return string.Join('\n',
new string[] {
"trigger.type",
"intent.trigger",
"result.intentId",
"result.text"
}
.Concat(result.Entities.Keys)
.Concat(result.Entities.Keys
.Select(x => $"{x}.json")
.Where(x => TryGetCluEntityJson(result, x, out var json)))
);

if (name == "trigger.type") return "intent";
if (name == "intent.trigger") return result.IntentId;
if (name == "result.intentId") return result.IntentId;
if (name == "result.text") return result.IntentRecognitionResult.Text;

if (result.Entities.ContainsKey(name)) return result.Entities[name];
if (result.Entities.ContainsKey($"{name}*")) return result.Entities[$"{name}*"];
if (name.EndsWith(".json") && TryGetCluEntityJson(result, name, out var json)) return json;

var value = result.IntentRecognitionResult.Properties.GetProperty(name);
if (!string.IsNullOrEmpty(value)) return value;

return null;
}

private bool TryGetCluEntityJson(IntentResult result, string name, out string? json)
{
json = null;
var check = name.Substring(0, name.Length - ".json".Length);
return result.Entities.ContainsKey(check) &&
result.EntitiesJson != null &&
result.EntitiesJson.TryGetValue(check, out json);
}

private ICommandSet? _commandSet;
private ICommandExecutionService? _commandExecutionService;
private EmbeddedLanguageUnderstandingModel? _model;
private IIntentRecognizerService? _recognizer;
private string _intentGroupId = "";
}
3 changes: 2 additions & 1 deletion src/cs/CommandSystem/CommandSystem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ public static ICommandSystemBuilder CreateBuilder()

// conditions/triggers/executor extensions
services.AddTransient<ICommandSetExtension, PhraseTriggerCommandSetExtension>();
services.AddTransient<ICommandSetExtension, IntentTriggerCommandSetExtension>();
//services.AddTransient<ICommandSetExtension, IntentTriggerCommandSetExtension>();
services.AddTransient<ICommandSetExtension, EmbeddedIntentTriggerCommandSetExtension>();
services.AddTransient<ICommandSetExtension, MessageTriggerCommandSetExtension>();
services.AddTransient<ICommandSetExtension, HotkeyTriggerCommandSetExtension>();
services.AddTransient<ICommandSetExtension, NuggetTriggerCommandSetExtension>();
Expand Down
26 changes: 18 additions & 8 deletions src/cs/CommandSystem/DeveloperSettings.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
using Microsoft.Extensions.DependencyInjection;

namespace macaroni;
namespace macaroni;

internal class DeveloperSettings : Dictionary<string, string>, IDeveloperSettings
{
Expand Down Expand Up @@ -43,6 +41,7 @@ private void Init()
this.Add("CLU_DEPLOYMENT_NAME", "v1");
this.Add("CLU_ENDPOINT", "https://internal-gm-dev.cognitiveservices.azure.com");
this.Add("CLU_PROJECT_NAME", "GM-Orchestrator");

this.Add("OPEN_AI_DEPLOYMENT", "robch-southcentral-oai-txtdav002");
this.Add("OPEN_AI_ENDPOINT", "https://robch-openai.openai.azure.com/");
this.Add("SPEECH_REGION", "westus");
Expand All @@ -56,25 +55,36 @@ private void Init()
this.Add("VISION_ENDPOINT", "https://carbon-vision.cognitiveservices.azure.com");
}

if (OS.IsWindows())
{
this.Add("SPEECH_SDK_LOG_PATH", $"macaroni.carbon-{DateTime.Now.ToFileTime()}.log");
}
else
{
this.Add("SPEECH_SDK_LOG_PATH", Path.Combine(@"/storage/emulated/0/Android/data/com.companyname.maui/files", $"macaroni.carbon-{DateTime.Now.ToFileTime()}.log")); // e.g. @" / data/user/0/com.companyname.maui/files/embedded/lu");
}

// DEVELOPER: To use Embedded speech (SR and TTS), update your local source to set `embedded=true` and replace
// `UPDATE_PATH_HERE` placeholders with appropriate locations where the model data files can be found

var embedded = false; // !!!IMPORTANT!!!: DO NOT CHECK IN this file with embedded set to TRUE
var embedded = true; // !!!IMPORTANT!!!: DO NOT CHECK IN this file with embedded set to TRUE
if (embedded) // also update <SpeechSDKEmbedded>true/false</SpeechSDKEmbedded> in `csproj` files
{
if (OS.IsWindows())
{
this.Add("EMBEDDED_SPEECH_RECOGNITION_MODEL_PATH", @"UPDATE_PATH_HERE"); // e.g. @"D:\src\macaroni\external\embedded_sr_model_FP_en-US_V8_onnx");
this.Add("EMBEDDED_SPEECH_SYNTHESIS_VOICE_PATH", @"UPDATE_PATH_HERE"); // e.g. @"D:\src\macaroni\external\embedded_tts_mark_sps");
this.Add("EMBEDDED_LANGUAGE_UNDERSTANDING_MODEL_PATH", @"UPDATE_PATH_HERE"); // e.g. @"D:\src\macaroni\external\lu");
}
else
{
this.Add("EMBEDDED_SPEECH_RECOGNITION_MODEL_PATH", @"UPDATE_PATH_HERE"); // e.g. @"/data/user/0/com.companyname.maui/files/embedded/sr");
this.Add("EMBEDDED_SPEECH_SYNTHESIS_VOICE_PATH", @"UPDATE_PATH_HERE"); // e.g. @"/data/user/0/com.companyname.maui/files/embedded/tts");
this.Add("EMBEDDED_SPEECH_RECOGNITION_MODEL_PATH", @"/data/user/0/com.companyname.maui/files/embedded/sr"); // e.g. @"/data/user/0/com.companyname.maui/files/embedded/sr");
this.Add("EMBEDDED_SPEECH_SYNTHESIS_VOICE_PATH", @"/data/user/0/com.companyname.maui/files/embedded/tts"); // e.g. @" / data/user/0/com.companyname.maui/files/embedded/tts");
this.Add("EMBEDDED_LANGUAGE_UNDERSTANDING_MODEL_PATH", @"/data/user/0/com.companyname.maui/files/embedded/lu"); // e.g. @" / data/user/0/com.companyname.maui/files/embedded/lu");
}

this.Add("EMBEDDED_SPEECH_RECOGNITION_MODEL_NAME", "Microsoft Speech Recognizer en-US FP Model V8");
this.Add("EMBEDDED_SPEECH_SYNTHESIS_VOICE_NAME", "Microsoft Server Speech Text to Speech Voice (en-US, Mark, Apollo)");
this.Add("EMBEDDED_SPEECH_RECOGNITION_MODEL_NAME", "Microsoft Speech Recognizer en-US FP Model V8.1");
this.Add("EMBEDDED_SPEECH_SYNTHESIS_VOICE_NAME", "Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)");
this.Add("EMBEDDED_SPEECH_RECOGNITION_MODEL_KEY", "");
this.Add("EMBEDDED_SPEECH_SYNTHESIS_VOICE_KEY", "");
}
Expand Down
11 changes: 8 additions & 3 deletions src/cs/CommandSystemServices/SpeechConfigService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,20 @@ public SpeechConfig GetSpeechConfig(SpeechConfigKind kind)
var modelKey = _settings.Get("EMBEDDED_SPEECH_RECOGNITION_MODEL_KEY", "");
var voiceName = _settings.Get("EMBEDDED_SPEECH_SYNTHESIS_VOICE_NAME", "");
var voiceKey = _settings.Get("EMBEDDED_SPEECH_SYNTHESIS_VOICE_KEY", "");
var sdkLogFile = _settings.Get("SPEECH_SDK_LOG_PATH", "");

var config = EmbeddedSpeechConfig.FromPaths(embeddedPaths);

if (!string.IsNullOrEmpty(sdkLogFile))
{
config.SetProperty(PropertyId.Speech_LogFilename, sdkLogFile);
}

if (!string.IsNullOrEmpty(modelName) && !string.IsNullOrEmpty(voiceName))
{
var config = EmbeddedSpeechConfig.FromPaths(embeddedPaths);
config.SetSpeechRecognitionModel(modelName, modelKey);
config.SetSpeechSynthesisVoice(voiceName, voiceKey);
config.SetProperty(PropertyId.Speech_SegmentationSilenceTimeoutMs, GetSegmentationTimeout());
config.SetProperty("EmbeddedSpeech-DisableTelemetry", "true");

if (voiceName.Contains("Neural"))
{
config.SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm);
Expand Down
49 changes: 42 additions & 7 deletions src/cs/IntentRecognition/IntentRecognizerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,25 @@ public void Emulate(string text)
WaitForever(recognizer.RecognizeOnceAsync(text));
}

public void UnloadEmbeddedLUModel(EmbeddedLanguageUnderstandingModel model)
{
lock (_models)
{
_models.Remove(model.ModelId);
}
}

public void LoadEmbeddedLUModel(EmbeddedLanguageUnderstandingModel model)
{
lock (_models)
{
_models?.Remove(model.ModelId);
_models?.Add(model);
_recognizerOnce?.ApplyLanguageModels(_models);
_recognizerContinuous?.ApplyLanguageModels(_models);
}
}

public void RegisterIntentCallback(string intentId, string? entityId, string groupId, Action<IntentResult> callback)
{
lock (_callbacks)
Expand Down Expand Up @@ -297,6 +316,10 @@ private void CheckInitCLU()
{
InitCLU();
}
else
{
InitEmbeddedLU();
}
}

private void InitCLU()
Expand All @@ -309,15 +332,27 @@ private void InitCLU()
var project = settings.Get("CLU_PROJECT_NAME")?.Trim();
var deployment = settings.Get("CLU_DEPLOYMENT_NAME")?.Trim();

// TODO, nateko, enable this after upgrading the SDK.
// only create the model if we have everything
if ((key ?? endpoint ?? project ?? deployment) != null)
{
var cluMode = new ConversationalLanguageUnderstandingModel(key, endpoint, project, deployment);
_models.Add(new ConversationalLanguageUnderstandingModel(key, endpoint, project, deployment));
}
//if ((key ?? endpoint ?? project ?? deployment) != null)
//{
// var cluMode = new ConversationalLanguageUnderstandingModel(key, endpoint, project, deployment);
// _models.Add(new ConversationalLanguageUnderstandingModel(key, endpoint, project, deployment));
//}
// end TODO
}

private void InitEmbeddedLU()
{
//TODO: this should come from some extension that handles declarations of CLU models. Also should we
// support multiple CLU projects?
var settings = _serviceProvider.GetRequiredService<ICommandSystemSettings>();
var luModelPath = settings.Get("EMBEDDED_LANGUAGE_UNDERSTANDING_MODEL_PATH") ?? string.Empty;
var iniFile = Path.Combine(luModelPath.Trim(), $"lu.ini");

_models.Add(EmbeddedLanguageUnderstandingModel.FromIniFile(iniFile, string.Empty));
}

private void ResetRecognizer(ref IntentRecognizer? recognizer)
{
lock (this)
Expand Down Expand Up @@ -381,9 +416,9 @@ private IntentRecognizer CreateIntentRecognizer()
{
return new IntentRecognizer(embeddedConfig, audio);
}
catch (Exception)
catch (Exception e)
{
MR.DBG_TRACE_ERROR("Failed to create embedded IntentRecognizer");
MR.DBG_TRACE_ERROR($"Failed to create embedded IntentRecognizer {e.Message}");
}
}

Expand Down
Loading