Skip to content

Commit 484063c

Browse files
authored
Merge pull request #20 from Abandon-ht/dev
update vad_whisper api & demo
2 parents 947ea55 + 3d61ba7 commit 484063c

File tree

11 files changed

+364
-3
lines changed

11 files changed

+364
-3
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*/
6+
#include <Arduino.h>
7+
#include <M5Unified.h>
8+
#include <M5ModuleLLM.h>
9+
10+
M5ModuleLLM module_llm;
11+
12+
/* Must be capitalized */
13+
String wake_up_keyword = "HELLO";
14+
// String wake_up_keyword = "你好你好";
15+
String kws_work_id;
16+
String vad_work_id;
17+
String whisper_work_id;
18+
String language;
19+
20+
void setup()
21+
{
22+
M5.begin();
23+
M5.Display.setTextSize(2);
24+
M5.Display.setTextScroll(true);
25+
// M5.Display.setFont(&fonts::efontCN_12); // Support Chinese display
26+
// M5.Display.setFont(&fonts::efontJA_12); // Support Japanese display
27+
28+
language = "en_US";
29+
// language = "zh_CN";
30+
31+
/* Init module serial port */
32+
// int rxd = 16, txd = 17; // Basic
33+
// int rxd = 13, txd = 14; // Core2
34+
// int rxd = 18, txd = 17; // CoreS3
35+
int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
36+
int txd = M5.getPin(m5::pin_name_t::port_c_txd);
37+
Serial2.begin(115200, SERIAL_8N1, rxd, txd);
38+
39+
/* Init module */
40+
module_llm.begin(&Serial2);
41+
42+
/* Make sure module is connected */
43+
M5.Display.printf(">> Check ModuleLLM connection..\n");
44+
while (1) {
45+
if (module_llm.checkConnection()) {
46+
break;
47+
}
48+
}
49+
50+
/* Reset ModuleLLM */
51+
M5.Display.printf(">> Reset ModuleLLM..\n");
52+
module_llm.sys.reset();
53+
54+
/* Setup Audio module */
55+
M5.Display.printf(">> Setup audio..\n");
56+
module_llm.audio.setup();
57+
58+
/* Setup KWS module and save returned work id */
59+
M5.Display.printf(">> Setup kws..\n");
60+
m5_module_llm::ApiKwsSetupConfig_t kws_config;
61+
kws_config.kws = wake_up_keyword;
62+
kws_work_id = module_llm.kws.setup(kws_config, "kws_setup", language);
63+
64+
/* Setup VAD module and save returned work id */
65+
M5.Display.printf(">> Setup vad..\n");
66+
m5_module_llm::ApiVadSetupConfig_t vad_config;
67+
vad_config.input = {"sys.pcm", kws_work_id};
68+
vad_work_id = module_llm.vad.setup(vad_config, "vad_setup");
69+
70+
/* Setup Whisper module and save returned work id */
71+
M5.Display.printf(">> Setup whisper..\n");
72+
m5_module_llm::ApiWhisperSetupConfig_t whisper_config;
73+
whisper_config.input = {"sys.pcm", kws_work_id, vad_work_id};
74+
whisper_config.language = "en";
75+
// whisper_config.language = "zh";
76+
// whisper_config.language = "ja";
77+
whisper_work_id = module_llm.whisper.setup(whisper_config, "whisper_setup");
78+
79+
M5.Display.printf(">> Setup ok\n>> Say \"%s\" to wakeup\n", wake_up_keyword.c_str());
80+
}
81+
82+
void loop()
83+
{
84+
/* Update ModuleLLM */
85+
module_llm.update();
86+
87+
/* Handle module response messages */
88+
for (auto& msg : module_llm.msg.responseMsgList) {
89+
/* If KWS module message */
90+
if (msg.work_id == kws_work_id) {
91+
M5.Display.setTextColor(TFT_GREENYELLOW);
92+
M5.Display.printf(">> Keyword detected\n");
93+
}
94+
95+
/* If ASR module message */
96+
if (msg.work_id == whisper_work_id) {
97+
/* Check message object type */
98+
if (msg.object == "asr.utf-8") {
99+
/* Parse message json and get ASR result */
100+
JsonDocument doc;
101+
deserializeJson(doc, msg.raw_msg);
102+
String asr_result = doc["data"].as<String>();
103+
104+
M5.Display.setTextColor(TFT_YELLOW);
105+
M5.Display.printf(">> %s\n", asr_result.c_str());
106+
}
107+
}
108+
}
109+
110+
/* Clear handled messages */
111+
module_llm.msg.responseMsgList.clear();
112+
}

examples/YOLO/YOLO.ino

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,5 +122,8 @@ void loop()
122122
}
123123

124124
/* Clear handled messages */
125+
module_llm.msg.clearMsg("yolo_setup");
125126
module_llm.msg.responseMsgList.clear();
127+
128+
usleep(500000);
126129
}

library.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name=M5ModuleLLM
2-
version=1.0.0
2+
version=1.4.0
33
author=M5Stack
44
maintainer=M5Stack
55
sentence=M5ModuleLLM is a library for M5ModuleLLM

src/M5ModuleLLM.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ bool M5ModuleLLM::begin(Stream* serialPort)
2020
asr.init(&msg);
2121
yolo.init(&msg);
2222
camera.init(&msg);
23+
vad.init(&msg);
24+
whisper.init(&msg);
2325
depthanything.init(&msg);
2426
return true;
2527
}

src/M5ModuleLLM.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#include "api/api_yolo.h"
1919
#include "api/api_depth_anything.h"
2020
#include "api/api_camera.h"
21+
#include "api/api_vad.h"
22+
#include "api/api_whisper.h"
2123
#include "api/api_version.h"
2224

2325
class M5ModuleLLM {
@@ -105,6 +107,18 @@ class M5ModuleLLM {
105107
*/
106108
m5_module_llm::ApiYolo yolo;
107109

110+
/**
111+
* @brief VAD module api set
112+
*
113+
*/
114+
m5_module_llm::ApiVad vad;
115+
116+
/**
117+
* @brief Whisper module api set
118+
*
119+
*/
120+
m5_module_llm::ApiWhisper whisper;
121+
108122
/**
109123
* @brief DepthAnything module api set
110124
*

src/api/api_llm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ String ApiLlm::setup(ApiLlmSetupConfig_t config, String request_id)
4747
// Copy work id
4848
llm_work_id = msg.work_id;
4949
},
50-
20000);
50+
30000);
5151
return llm_work_id;
5252
}
5353

src/api/api_vad.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*/
6+
#include "api_vad.h"
7+
8+
using namespace m5_module_llm;
9+
10+
void ApiVad::init(ModuleMsg* moduleMsg)
11+
{
12+
_module_msg = moduleMsg;
13+
}
14+
15+
String ApiVad::setup(ApiVadSetupConfig_t config, String request_id)
16+
{
17+
String cmd;
18+
{
19+
JsonDocument doc;
20+
doc["request_id"] = request_id;
21+
doc["work_id"] = "vad";
22+
doc["action"] = "setup";
23+
doc["object"] = "vad.setup";
24+
doc["data"]["model"] = config.model;
25+
doc["data"]["response_format"] = config.response_format;
26+
doc["data"]["enoutput"] = config.enoutput;
27+
JsonArray inputArray = doc["data"]["input"].to<JsonArray>();
28+
for (const String& str : config.input) {
29+
inputArray.add(str);
30+
}
31+
serializeJson(doc, cmd);
32+
}
33+
34+
String work_id;
35+
_module_msg->sendCmdAndWaitToTakeMsg(
36+
cmd.c_str(), request_id,
37+
[&work_id](ResponseMsg_t& msg) {
38+
// Copy work id
39+
work_id = msg.work_id;
40+
},
41+
30000);
42+
return work_id;
43+
}
44+
45+
String ApiVad::exit(String work_id, String request_id)
46+
{
47+
String cmd;
48+
{
49+
JsonDocument doc;
50+
doc["request_id"] = request_id;
51+
doc["work_id"] = work_id;
52+
doc["action"] = "exit";
53+
serializeJson(doc, cmd);
54+
}
55+
56+
_module_msg->sendCmdAndWaitToTakeMsg(
57+
cmd.c_str(), request_id,
58+
[&work_id](ResponseMsg_t& msg) {
59+
// Copy work id
60+
work_id = msg.work_id;
61+
},
62+
100);
63+
return work_id;
64+
}

src/api/api_vad.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*/
6+
#pragma once
7+
#include "../utils/msg.h"
8+
#include <Arduino.h>
9+
10+
namespace m5_module_llm {
11+
12+
struct ApiVadSetupConfig_t {
13+
String model = "silero-vad";
14+
String response_format = "vad.bool";
15+
std::vector<String> input = {"sys.pcm"};
16+
bool enoutput = true;
17+
};
18+
19+
class ApiVad {
20+
public:
21+
void init(ModuleMsg* moduleMsg);
22+
23+
/**
24+
* @brief Setup module VAD, return VAD work_id
25+
*
26+
* @param config
27+
* @param request_id
28+
* @return String
29+
*/
30+
String setup(ApiVadSetupConfig_t config = ApiVadSetupConfig_t(), String request_id = "vad_setup");
31+
32+
/**
33+
* @brief Exit module VAD, return VAD work_id
34+
*
35+
* @param work_id
36+
* @param request_id
37+
* @return String
38+
*/
39+
String exit(String work_id, String request_id = "vad_exit");
40+
41+
private:
42+
ModuleMsg* _module_msg = nullptr;
43+
};
44+
45+
} // namespace m5_module_llm

src/api/api_whisper.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*/
6+
#include "api_whisper.h"
7+
8+
using namespace m5_module_llm;
9+
10+
void ApiWhisper::init(ModuleMsg* moduleMsg)
11+
{
12+
_module_msg = moduleMsg;
13+
}
14+
15+
String ApiWhisper::setup(ApiWhisperSetupConfig_t config, String request_id, String language)
16+
{
17+
String cmd;
18+
{
19+
JsonDocument doc;
20+
doc["request_id"] = request_id;
21+
doc["work_id"] = "whisper";
22+
doc["action"] = "setup";
23+
doc["object"] = "whisper.setup";
24+
doc["data"]["model"] = config.model;
25+
doc["data"]["response_format"] = config.response_format;
26+
doc["data"]["language"] = config.language;
27+
doc["data"]["enoutput"] = config.enoutput;
28+
JsonArray inputArray = doc["data"]["input"].to<JsonArray>();
29+
for (const String& str : config.input) {
30+
inputArray.add(str);
31+
}
32+
serializeJson(doc, cmd);
33+
}
34+
35+
String work_id;
36+
_module_msg->sendCmdAndWaitToTakeMsg(
37+
cmd.c_str(), request_id,
38+
[&work_id](ResponseMsg_t& msg) {
39+
// Copy work id
40+
work_id = msg.work_id;
41+
},
42+
10000);
43+
return work_id;
44+
}
45+
46+
String ApiWhisper::exit(String work_id, String request_id)
47+
{
48+
String cmd;
49+
{
50+
JsonDocument doc;
51+
doc["request_id"] = request_id;
52+
doc["work_id"] = work_id;
53+
doc["action"] = "exit";
54+
serializeJson(doc, cmd);
55+
}
56+
57+
_module_msg->sendCmdAndWaitToTakeMsg(
58+
cmd.c_str(), request_id,
59+
[&work_id](ResponseMsg_t& msg) {
60+
// Copy work id
61+
work_id = msg.work_id;
62+
},
63+
100);
64+
return work_id;
65+
}

0 commit comments

Comments
 (0)