Skip to content
This repository was archived by the owner on Aug 2, 2020. It is now read-only.

Commit 269982d

Browse files
committed
Add config "convert_unicode_emoji"
1 parent 5ab902b commit 269982d

File tree

3 files changed

+75
-64
lines changed

3 files changed

+75
-64
lines changed

src/conf/config_struct.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,5 @@ struct Config {
4343
bool auto_perform_update = false;
4444
size_t thread_pool_size = 4;
4545
size_t server_thread_pool_size = 1;
46+
bool convert_unicode_emoji = true;
4647
};

src/conf/loader.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ optional<Config> load_configuration(const string &filepath) {
113113
GET_BOOL_CONFIG(auto_perform_update);
114114
GET_CONFIG(thread_pool_size, size_t);
115115
GET_CONFIG(server_thread_pool_size, size_t);
116+
GET_BOOL_CONFIG(convert_unicode_emoji);
116117
#undef GET_CONFIG
117118

118119
Log::i(TAG, u8"加载配置文件成功");

src/helpers.cpp

Lines changed: 73 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -182,88 +182,97 @@ bool is_emoji(const uint32_t codepoint) {
182182

183183
string string_to_coolq(const string &str) {
184184
// call CoolQ API
185-
string processed_str;
186-
187-
wstring_convert<codecvt_utf8<uint32_t>, uint32_t> uint32_conv;
188-
auto uint32_str = uint32_conv.from_bytes(str);
189-
190-
auto append_text = [&](const decltype(uint32_str.cbegin()) &begin,
191-
const decltype(uint32_str.cbegin()) &end) {
192-
decltype(uint32_str) uint32_part_str(begin, end);
193-
auto utf8_part_str = uint32_conv.to_bytes(uint32_part_str);
194-
processed_str += utf8_part_str;
195-
};
196-
197-
auto last_it = uint32_str.cbegin();
198-
for (auto it = uint32_str.cbegin(); it != uint32_str.cend(); ++it) {
199-
const auto codepoint = *it;
200-
if (is_emoji(codepoint)) {
201-
// is emoji
202-
append_text(last_it, it);
203-
processed_str += "[CQ:emoji,id=" + to_string(codepoint) + "]";
204-
last_it = it + 1;
185+
186+
if (config.convert_unicode_emoji) {
187+
string processed_str;
188+
189+
wstring_convert<codecvt_utf8<uint32_t>, uint32_t> uint32_conv;
190+
auto uint32_str = uint32_conv.from_bytes(str);
191+
192+
auto append_text = [&](const decltype(uint32_str.cbegin()) &begin,
193+
const decltype(uint32_str.cbegin()) &end) {
194+
decltype(uint32_str) uint32_part_str(begin, end);
195+
auto utf8_part_str = uint32_conv.to_bytes(uint32_part_str);
196+
processed_str += utf8_part_str;
197+
};
198+
199+
auto last_it = uint32_str.cbegin();
200+
for (auto it = uint32_str.cbegin(); it != uint32_str.cend(); ++it) {
201+
const auto codepoint = *it;
202+
if (is_emoji(codepoint)) {
203+
// is emoji
204+
append_text(last_it, it);
205+
processed_str += "[CQ:emoji,id=" + to_string(codepoint) + "]";
206+
last_it = it + 1;
207+
}
205208
}
209+
append_text(last_it, uint32_str.cend());
210+
211+
return iconv_string_encode(processed_str, "gb18030");
206212
}
207-
append_text(last_it, uint32_str.cend());
208213

209-
return iconv_string_encode(processed_str, "gb18030");
214+
return iconv_string_encode(str, "gb18030");
210215
}
211216

212217
string string_from_coolq(const string &str) {
213218
// handle CoolQ event or data
214219
auto utf8_str = iconv_string_decode(str, "gb18030");
215220

216-
smatch m;
221+
if (config.convert_unicode_emoji) {
222+
smatch m;
217223

218-
string processed_str_1;
219-
auto it_1 = utf8_str.cbegin();
220-
while (regex_search(it_1, utf8_str.cend(), m, regex(R"(\[CQ:emoji,\s*id=(\d+)\])"))) {
221-
processed_str_1 += string(it_1, it_1 + m.position());
224+
string processed_str_1;
225+
auto it_1 = utf8_str.cbegin();
226+
while (regex_search(it_1, utf8_str.cend(), m, regex(R"(\[CQ:emoji,\s*id=(\d+)\])"))) {
227+
processed_str_1 += string(it_1, it_1 + m.position());
222228

223-
const auto codepoint_str = m.str(1);
224-
u32string u32_str;
229+
const auto codepoint_str = m.str(1);
230+
u32string u32_str;
225231

226-
if (boost::starts_with(codepoint_str, "100000")) {
227-
// keycap # to keycap 9
228-
const auto codepoint = static_cast<char32_t>(stoul(codepoint_str.substr(strlen("100000"))));
229-
u32_str.append({codepoint, 0xFE0F, 0x20E3});
230-
} else {
231-
const auto codepoint = static_cast<char32_t>(stoul(codepoint_str));
232-
u32_str.append({codepoint});
233-
}
232+
if (boost::starts_with(codepoint_str, "100000")) {
233+
// keycap # to keycap 9
234+
const auto codepoint = static_cast<char32_t>(stoul(codepoint_str.substr(strlen("100000"))));
235+
u32_str.append({codepoint, 0xFE0F, 0x20E3});
236+
} else {
237+
const auto codepoint = static_cast<char32_t>(stoul(codepoint_str));
238+
u32_str.append({codepoint});
239+
}
234240

235-
const auto p = reinterpret_cast<const uint32_t *>(u32_str.data());
236-
wstring_convert<codecvt_utf8<uint32_t>, uint32_t> conv;
237-
const auto emoji_utf8_str = conv.to_bytes(p, p + u32_str.size());
238-
processed_str_1 += emoji_utf8_str;
241+
const auto p = reinterpret_cast<const uint32_t *>(u32_str.data());
242+
wstring_convert<codecvt_utf8<uint32_t>, uint32_t> conv;
243+
const auto emoji_utf8_str = conv.to_bytes(p, p + u32_str.size());
244+
processed_str_1 += emoji_utf8_str;
239245

240-
it_1 += m.position() + m.length();
241-
}
242-
processed_str_1 += string(it_1, utf8_str.cend());
243-
244-
// CoolQ sometimes use "#\uFE0F" to represent "#\uFE0F\u20E3"
245-
// we should convert them into correct emoji codepoints here
246-
// \uFE0F == \xef\xb8\x8f
247-
// \u20E3 == \xe2\x83\xa3
248-
string processed_str_2;
249-
auto it_2 = processed_str_1.cbegin();
250-
while (regex_search(it_2, processed_str_1.cend(), m, regex("[#*0-9]\xef\xb8\x8f"))) {
251-
processed_str_2 += string(it_2, it_2 + m.position());
252-
253-
const auto pos = m.position();
254-
if (processed_str_1.cend() - (it_2 + pos) < strlen("\xef\xb8\x8f\xe2\x83\xa3")
255-
|| string(it_2 + pos + 4, it_2 + pos + 7) != "\xe2\x83\xa3") {
256-
// there is no "\u20E3" behind this match
257-
processed_str_2 += m.str(0) + "\xe2\x83\xa3";
258-
} else {
259-
processed_str_2 += m.str(0);
246+
it_1 += m.position() + m.length();
247+
}
248+
processed_str_1 += string(it_1, utf8_str.cend());
249+
250+
// CoolQ sometimes use "#\uFE0F" to represent "#\uFE0F\u20E3"
251+
// we should convert them into correct emoji codepoints here
252+
// \uFE0F == \xef\xb8\x8f
253+
// \u20E3 == \xe2\x83\xa3
254+
string processed_str_2;
255+
auto it_2 = processed_str_1.cbegin();
256+
while (regex_search(it_2, processed_str_1.cend(), m, regex("[#*0-9]\xef\xb8\x8f"))) {
257+
processed_str_2 += string(it_2, it_2 + m.position());
258+
259+
const auto pos = m.position();
260+
if (processed_str_1.cend() - (it_2 + pos) < strlen("\xef\xb8\x8f\xe2\x83\xa3")
261+
|| string(it_2 + pos + 4, it_2 + pos + 7) != "\xe2\x83\xa3") {
262+
// there is no "\u20E3" behind this match
263+
processed_str_2 += m.str(0) + "\xe2\x83\xa3";
264+
} else {
265+
processed_str_2 += m.str(0);
266+
}
267+
268+
it_2 += m.position() + m.length();
260269
}
270+
processed_str_2 += string(it_2, processed_str_1.cend());
261271

262-
it_2 += m.position() + m.length();
272+
return processed_str_2;
263273
}
264-
processed_str_2 += string(it_2, processed_str_1.cend());
265274

266-
return processed_str_2;
275+
return utf8_str;
267276
}
268277

269278
unsigned random_int(const unsigned min, const unsigned max) {

0 commit comments

Comments
 (0)