Skip to content

Commit e06e8e3

Browse files
SpeechRecognitionModule to dll
1 parent 802361e commit e06e8e3

File tree

11 files changed

+37
-2358
lines changed

11 files changed

+37
-2358
lines changed
14.9 MB
Binary file not shown.

Source/SmartCompanion/Commands/CommandHandler/CommandHandler.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,10 @@ bool CommandHandler::Init()
4141

4242
commandStorage.Add(FString("red"), TSharedPtr<ICommand>(new KillRed()));
4343
commandStorage.Add(FString("read"), TSharedPtr<ICommand>(new KillRed()));
44+
commandStorage.Add(FString("and"), TSharedPtr<ICommand>(new KillRed()));
45+
commandStorage.Add(FString("read"), TSharedPtr<ICommand>(new KillRed()));
4446
commandStorage.Add(FString("blue"), TSharedPtr<ICommand>(new KillBlue()));
47+
commandStorage.Add(FString("you"), TSharedPtr<ICommand>(new KillBlue()));
4548

4649
bRunThread = true;
4750
return true;
@@ -53,8 +56,8 @@ uint32 CommandHandler::Run()
5356
{
5457
if (isActivateSpeechRecognition)
5558
{
56-
//FindCommand();
57-
commandStorage[FString("blue")]->Run();
59+
for (int i = 0; i < 10; ++i) FindCommand();
60+
//commandStorage[FString("blue")]->Run();
5861
isActivateSpeechRecognition = false;
5962
}
6063
}
@@ -69,7 +72,7 @@ void CommandHandler::Stop()
6972

7073
void CommandHandler::FindCommand()
7174
{
72-
auto text = speechRecoginitonModule.Run();
75+
std::string text(SPEECHRECOGNITION_SINGLETON.Run());
7376

7477
for (auto& [key, value] : commandStorage)
7578
{

Source/SmartCompanion/Commands/CommandHandler/CommandHandler.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,5 @@ class CommandHandler : public FRunnable
3333

3434
bool isActivateSpeechRecognition;
3535
TMap<FString, TSharedPtr<ICommand>> commandStorage;
36-
37-
UESpeechRecognitionModule speechRecoginitonModule;
38-
UEComputerVisionModule computerVisionModule;
39-
4036
UWorld* worldContext;
4137
};

Source/UEComputerVision/UEComputerVision.Build.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public void LoadComputerVisionModule(ReadOnlyTargetRules Target)
2525
//PublicDelayLoadDLLs.Add("ComputerVisionModule.dll");
2626

2727
CopyToBinaries(baseDir + "\\ThirdParty\\ComputerVisionModule\\bin\\ComputerVisionModule.dll", Target);
28+
CopyToBinaries(baseDir + "\\ThirdParty\\OpenCV\\bin\\opencv_world470.dll", Target);
2829
}
2930

3031
public UEComputerVision(ReadOnlyTargetRules Target) : base(Target)

Source/UESpeechRecognition/UESpeechRecognition.Build.cs

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,19 @@ private void CopyToBinaries(string Filepath, ReadOnlyTargetRules Target)
1818
File.Copy(Filepath, Path.Combine(binariesDir, filename), true);
1919
}
2020

21-
public void LoadVosk(ReadOnlyTargetRules Target)
21+
public void LoadSpeechRecognitionModule(ReadOnlyTargetRules Target)
2222
{
23-
PublicAdditionalLibraries.Add(baseDir + "\\ThirdParty\\Vosk\\vosk-win64\\libvosk.lib");
24-
PublicIncludePaths.Add(baseDir + "\\ThirdParty\\Vosk\\vosk-win64");
25-
RuntimeDependencies.Add(baseDir + "\\ThirdParty\\Vosk\\vosk-win64\\libvosk.dll");
26-
PublicDelayLoadDLLs.Add("libvosk.dll");
23+
PublicAdditionalLibraries.Add(baseDir + "\\ThirdParty\\SpeechRecognitionModule\\lib\\SpeechRecognitionModule.lib");
24+
RuntimeDependencies.Add(baseDir + "\\ThirdParty\\SpeechRecognitionModule\\bin\\SpeechRecognitionModule.dll");
2725

26+
CopyToBinaries(baseDir + "\\ThirdParty\\SpeechRecognitionModule\\bin\\SpeechRecognitionModule.dll", Target);
27+
CopyToBinaries(baseDir + "\\ThirdParty\\PortAudio\\Lib\\Release\\portaudio_x64.dll", Target);
2828
CopyToBinaries(baseDir + "\\ThirdParty\\Vosk\\vosk-win64\\libgcc_s_seh-1.dll", Target);
2929
CopyToBinaries(baseDir + "\\ThirdParty\\Vosk\\vosk-win64\\libstdc++-6.dll", Target);
3030
CopyToBinaries(baseDir + "\\ThirdParty\\Vosk\\vosk-win64\\libwinpthread-1.dll", Target);
3131
CopyToBinaries(baseDir + "\\ThirdParty\\Vosk\\vosk-win64\\libvosk.dll", Target);
3232
}
3333

34-
public void LoadPortAudio(ReadOnlyTargetRules Target)
35-
{
36-
PublicAdditionalLibraries.Add(baseDir + "\\ThirdParty\\PortAudio\\Lib\\Release\\portaudio_x64.lib");
37-
PublicIncludePaths.Add(baseDir + "\\ThirdParty\\PortAudio\\Include");
38-
RuntimeDependencies.Add(baseDir + "\\ThirdParty\\PortAudio\\Lib\\Release\\portaudio_x64.dll");
39-
PublicDelayLoadDLLs.Add("portaudio_x64.dll");
40-
41-
CopyToBinaries(baseDir + "\\ThirdParty\\PortAudio\\Lib\\Release\\portaudio_x64.dll", Target);
42-
}
43-
4434
public UESpeechRecognition(ReadOnlyTargetRules Target) : base(Target)
4535
{
4636
bEnableExceptions = true;
@@ -50,7 +40,6 @@ public UESpeechRecognition(ReadOnlyTargetRules Target) : base(Target)
5040
"Core"
5141
});
5242

53-
LoadVosk(Target);
54-
LoadPortAudio(Target);
43+
LoadSpeechRecognitionModule(Target);
5544
}
5645
}

Source/UESpeechRecognition/UESpeechRecognitionModule.cpp

Lines changed: 18 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -1,182 +1,45 @@
11
// Copyright Epic Games, Inc. All Rights Reserved.
22

33
#include "UESpeechRecognitionModule.h"
4-
#include "simpleson/json.h"
54
#include "Modules/ModuleManager.h"
65

7-
void UESpeechRecognitionModule::StartupVosk()
6+
void UESpeechRecognitionModule::StartupUESpeechRecognitionModule()
87
{
9-
const FString LibVoskPath = FPaths::Combine(*BasePluginDir, TEXT("Binaries/Win64/libvosk.dll"));
10-
DynamicLibVoskHandle = FPlatformProcess::GetDllHandle(*LibVoskPath);
8+
const FString LibSpeechRecognitionModule = FPaths::Combine("E:/SmartCompanion/", TEXT("Binaries/Win64/SpeechRecognitionModule.dll"));
9+
DynamicLibSpeechRecognitionModuleHandle = FPlatformProcess::GetDllHandle(*LibSpeechRecognitionModule);
1110

12-
if (DynamicLibVoskHandle)
11+
if (DynamicLibSpeechRecognitionModuleHandle)
1312
{
14-
UE_LOG(LogTemp, Log, TEXT("libvosk.dll loaded successfully!"));
13+
UE_LOG(LogTemp, Log, TEXT("SpeechRecognitionModule.dll loaded successfully!"));
1514
}
1615
else
1716
{
18-
UE_LOG(LogTemp, Fatal, TEXT("libvosk.dll failed to load!"));
17+
UE_LOG(LogTemp, Fatal, TEXT("SpeechRecognitionModule.dll failed to load!"));
1918
}
2019
}
2120

22-
void UESpeechRecognitionModule::StatupPortAudio()
21+
void UESpeechRecognitionModule::ShutdownUESpeechRecognitionModule()
2322
{
24-
const FString LibPortAudioPath = FPaths::Combine(*BasePluginDir, TEXT("Binaries/Win64/portaudio_x64.dll"));
25-
DynamicLibPortAudioHandle = FPlatformProcess::GetDllHandle(*LibPortAudioPath);
26-
27-
if (DynamicLibPortAudioHandle)
28-
{
29-
UE_LOG(LogTemp, Log, TEXT("portaudio_x64.dll loaded successfully!"));
30-
}
31-
else
32-
{
33-
UE_LOG(LogTemp, Fatal, TEXT("portaudio_x64.dll failed to load!"));
34-
}
35-
}
36-
37-
void UESpeechRecognitionModule::ShutdownVosk()
38-
{
39-
if (DynamicLibVoskHandle) FPlatformProcess::FreeDllHandle(DynamicLibVoskHandle);
40-
DynamicLibVoskHandle = nullptr;
41-
}
42-
43-
void UESpeechRecognitionModule::ShutdownPortAudio()
44-
{
45-
if (DynamicLibPortAudioHandle) FPlatformProcess::FreeDllHandle(DynamicLibPortAudioHandle);
46-
DynamicLibPortAudioHandle = nullptr;
23+
FPlatformProcess::FreeDllHandle(DynamicLibSpeechRecognitionModuleHandle);
24+
DynamicLibSpeechRecognitionModuleHandle = nullptr;
4725
}
4826

4927
void UESpeechRecognitionModule::StartupModule()
5028
{
51-
//StartupVosk();
52-
//StatupPortAudio();
53-
54-
if (!InializeModelAndRecognizer()) return;
55-
if (!InitializePortAudio()) return;
56-
if (!SetAudioDevice()) return;
57-
if (!OpenStream()) return;
58-
if (!StartStream()) return;
59-
}
60-
61-
void UESpeechRecognitionModule::ShutdownModule()
62-
{
63-
Pa_CloseStream(stream);
64-
vosk_recognizer_free(recognizer);
65-
vosk_model_free(model);
66-
67-
//if (DynamicLibVoskHandle) ShutdownVosk();
68-
//if (DynamicLibPortAudioHandle) ShutdownPortAudio();
69-
}
70-
71-
bool UESpeechRecognitionModule::InializeModelAndRecognizer()
72-
{
73-
std::string path = baseDir + "\\Models\\Vosk\\vosk-model-small-en-us-0.15";
74-
75-
model = vosk_model_new(path.c_str());
76-
if (!model)
77-
{
78-
UE_LOG(LogTemp, Display, TEXT("vosk_model_new: error"));
79-
return false;
80-
}
81-
82-
recognizer = vosk_recognizer_new(model, 16000.0);
83-
if (!recognizer)
84-
{
85-
UE_LOG(LogTemp, Display, TEXT("vosk_recognizer_new: error"));
86-
return false;
87-
}
88-
89-
return true;
90-
}
91-
92-
bool UESpeechRecognitionModule::InitializePortAudio()
93-
{
94-
PaError err = Pa_Initialize();
95-
if (err != paNoError)
96-
{
97-
UE_LOG(LogTemp, Display, TEXT("Pa_Initialize: "), Pa_GetErrorText(err));
98-
return false;
99-
}
100-
101-
return true;
102-
}
103-
104-
bool UESpeechRecognitionModule::SetAudioDevice()
105-
{
106-
inputParametrs.channelCount = 1;
107-
inputParametrs.sampleFormat = paInt16;
108-
inputParametrs.hostApiSpecificStreamInfo = nullptr;
109-
inputParametrs.device = Pa_GetDefaultInputDevice();
29+
StartupUESpeechRecognitionModule();
11030

111-
if (inputParametrs.device == paNoDevice)
112-
{
113-
UE_LOG(LogTemp, Display, TEXT("Pa_GetDefaultInputDevice: no device"));
114-
return false;
115-
}
116-
117-
return true;
118-
}
119-
120-
bool UESpeechRecognitionModule::OpenStream()
121-
{
122-
PaError err = Pa_OpenStream(&stream, &inputParametrs, nullptr, 16000.0, 8192, 0, nullptr, nullptr);
123-
if (err != paNoError)
124-
{
125-
UE_LOG(LogTemp, Display, TEXT("Pa_OpenStream: "), Pa_GetErrorText(err));
126-
return false;
127-
}
128-
129-
return true;
130-
}
131-
132-
bool UESpeechRecognitionModule::StartStream()
133-
{
134-
PaError err = Pa_StartStream(stream);
135-
if (err != paNoError)
136-
{
137-
UE_LOG(LogTemp, Display, TEXT("Pa_StartStream: "), Pa_GetErrorText(err));
138-
return false;
139-
}
31+
Initialize = (const char*(*)())(FPlatformProcess::GetDllExport(DynamicLibSpeechRecognitionModuleHandle, TEXT("Initialize")));
32+
Run = (const char* (*)())(FPlatformProcess::GetDllExport(DynamicLibSpeechRecognitionModuleHandle, TEXT("Run")));
33+
Shutdown = (void(*)())(FPlatformProcess::GetDllExport(DynamicLibSpeechRecognitionModuleHandle, TEXT("Shutdown")));
14034

141-
return true;
35+
FString resInitialize(Initialize());
36+
UE_LOG(LogTemp, Display, TEXT("%s"), FString(resInitialize));
14237
}
14338

144-
std::string UESpeechRecognitionModule::Run()
145-
{
146-
bool isCorrectRead = ReadDataFromStream();
147-
if (!isCorrectRead) return {};
148-
149-
std::string recognizedText = Recognize();
150-
return recognizedText;
151-
}
152-
153-
bool UESpeechRecognitionModule::ReadDataFromStream()
154-
{
155-
PaError err = Pa_ReadStream(stream, (void*)data, SPEECH_BUFFER_SIZE / 2);
156-
if (err != paNoError && err != paInputOverflowed)
157-
{
158-
UE_LOG(LogTemp, Display, TEXT("Pa_ReadStream: "), Pa_GetErrorText(err));
159-
return false;
160-
}
161-
162-
return true;
163-
}
164-
165-
std::string UESpeechRecognitionModule::Recognize()
39+
void UESpeechRecognitionModule::ShutdownModule()
16640
{
167-
if (vosk_recognizer_accept_waveform(recognizer, data, sizeof(data)) == -1)
168-
{
169-
UE_LOG(LogTemp, Display, TEXT("vosk_recognizer_accept_waveform: error"));
170-
return {};
171-
}
172-
173-
auto resRegonition(vosk_recognizer_result(recognizer));
174-
auto resJSON = json::jobject::parse(resRegonition);
175-
176-
FString textFString(resJSON.get("text").c_str());
177-
UE_LOG(LogTemp, Display, TEXT("TEXT: %s"), *textFString);
178-
179-
return resJSON.get("text");
41+
Shutdown();
42+
ShutdownUESpeechRecognitionModule();
18043
}
18144

18245
IMPLEMENT_MODULE(UESpeechRecognitionModule, UESpeechRecognition);

Source/UESpeechRecognition/UESpeechRecognitionModule.h

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,48 +6,23 @@
66
#include "../ConfigData.h"
77
#include "../ISmartModule.h"
88

9-
#pragma comment(lib, "E:\\SmartCompanion\\ThirdParty\\PortAudio\\Lib\\Release\\portaudio_static_x64.lib")
10-
#pragma comment(lib, "E:\\SmartCompanion\\ThirdParty\\Vosk\\vosk-win64\\libvosk.lib")
11-
12-
#include "..\..\ThirdParty\PortAudio\Include\portaudio.h"
13-
#include "..\..\ThirdParty\Vosk\vosk-win64\vosk_api.h"
14-
159
#define SPEECHRECOGNITION_SINGLETON ((UESpeechRecognitionModule&)(FModuleManager::Get().LoadModuleChecked(TEXT("UESpeechRecognition"))))
1610

1711
class UESpeechRecognitionModule : public ISmartModule
1812
{
1913
private:
20-
VoskModel* model;
21-
VoskRecognizer* recognizer;
22-
PaStream* stream;
23-
PaStreamParameters inputParametrs;
24-
25-
char data[SPEECH_BUFFER_SIZE];
26-
27-
const FString BasePluginDir = "E:/SmartCompanion";
28-
29-
void* DynamicLibVoskHandle;
30-
void* DynamicLibPortAudioHandle;
14+
void* DynamicLibSpeechRecognitionModuleHandle;
3115

3216
private:
33-
bool InializeModelAndRecognizer();
34-
bool InitializePortAudio();
35-
bool SetAudioDevice();
36-
bool OpenStream();
37-
bool StartStream();
38-
39-
bool ReadDataFromStream();
40-
std::string Recognize();
41-
42-
void StartupVosk();
43-
void StatupPortAudio();
17+
const char* (*Initialize)();
18+
void (*Shutdown)();
4419

45-
void ShutdownVosk();
46-
void ShutdownPortAudio();
20+
void StartupUESpeechRecognitionModule();
21+
void ShutdownUESpeechRecognitionModule();
4722

4823
public:
4924
UESPEECHRECOGNITION_API void StartupModule() override;
5025
UESPEECHRECOGNITION_API void ShutdownModule() override;
5126

52-
UESPEECHRECOGNITION_API std::string Run();
27+
const char* (*Run)();
5328
};

0 commit comments

Comments
 (0)