diff --git a/WechatExporter.xcodeproj/project.pbxproj b/WechatExporter.xcodeproj/project.pbxproj index 4e333c4..aeafbae 100644 --- a/WechatExporter.xcodeproj/project.pbxproj +++ b/WechatExporter.xcodeproj/project.pbxproj @@ -368,6 +368,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; @@ -418,11 +419,12 @@ GCC_WARN_UNUSED_VARIABLE = YES; HEADER_SEARCH_PATHS = /usr/local/include/; LIBRARY_SEARCH_PATHS = /usr/local/lib; - MACOSX_DEPLOYMENT_TARGET = 10.10; + MACOSX_DEPLOYMENT_TARGET = 10.15; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; ONLY_ACTIVE_ARCH = YES; SDKROOT = macosx; + VALID_ARCHS = arm64; }; name = Debug; }; @@ -430,6 +432,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; @@ -474,17 +477,20 @@ GCC_WARN_UNUSED_VARIABLE = YES; HEADER_SEARCH_PATHS = /usr/local/include/; LIBRARY_SEARCH_PATHS = /usr/local/lib; - MACOSX_DEPLOYMENT_TARGET = 10.10; + MACOSX_DEPLOYMENT_TARGET = 10.15; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; SDKROOT = macosx; + VALID_ARCHS = arm64; }; name = Release; }; 343F6127252322D600FFE085 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { + ARCHS = arm64; ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; CODE_SIGN_ENTITLEMENTS = WechatExporter/WechatExporter.entitlements; CODE_SIGN_IDENTITY = "-"; CODE_SIGN_STYLE = Automatic; @@ -500,31 +506,43 @@ HEADER_SEARCH_PATHS = ( /usr/local/include/, "${SDKROOT}/usr/include/libxml2/", + /opt/homebrew/Cellar/lame/3.100/include, + /opt/homebrew/include, + /Users/caoye/git/WechatExporter/jsoncpp/include, + "/opt/homebrew/opt/protobuf@21/include", ); INFOPLIST_FILE = WechatExporter/Info.plist; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 10.10; + LIBRARY_SEARCH_PATHS = ( + /usr/local/lib, + "/opt/homebrew/opt/protobuf@21/lib", + ); + MACOSX_DEPLOYMENT_TARGET = 10.15; MARKETING_VERSION = 1.9.0; + OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; OTHER_LDFLAGS = ( "-L/usr/local/lib", - "-lprotobufd", - "-ljsoncpp", - "-lmp3lame", - "-lSKP_SILK_SDK", - "-lplist-2.0", + /opt/homebrew/lib/libmp3lame.a, + /opt/homebrew/lib/libSKP_SILK_SDK.a, + "/opt/homebrew/lib/libplist-2.0.a", + "/opt/homebrew/opt/protobuf@21/lib/libprotobuf.a", + /Users/caoye/git/WechatExporter/jsoncpp/build/lib/libjsoncpp.a, ); PRODUCT_BUNDLE_IDENTIFIER = org.wakin.WechatExporter; PRODUCT_NAME = "$(TARGET_NAME)"; + VALID_ARCHS = arm64; }; name = Debug; }; 343F6128252322D600FFE085 /* Release */ = { isa = XCBuildConfiguration; buildSettings = { + ARCHS = arm64; ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; CODE_SIGN_ENTITLEMENTS = WechatExporter/WechatExporter.entitlements; CODE_SIGN_IDENTITY = "-"; CODE_SIGN_STYLE = Automatic; @@ -539,24 +557,34 @@ HEADER_SEARCH_PATHS = ( /usr/local/include/, "${SDKROOT}/usr/include/libxml2/", + /opt/homebrew/Cellar/lame/3.100/include, + /opt/homebrew/include, + /Users/caoye/git/WechatExporter/jsoncpp/include, + "/opt/homebrew/opt/protobuf@21/include", ); INFOPLIST_FILE = WechatExporter/Info.plist; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 10.10; + LIBRARY_SEARCH_PATHS = ( + /usr/local/lib, + "/opt/homebrew/opt/protobuf@21/lib", + ); + MACOSX_DEPLOYMENT_TARGET = 10.15; MARKETING_VERSION = 1.9.0; + OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; OTHER_LDFLAGS = ( "-L/usr/local/lib", - "-lprotobuf", - "-ljsoncpp", - "-lmp3lame", - "-lSKP_SILK_SDK", - "-lplist-2.0", + /opt/homebrew/lib/libmp3lame.a, + /opt/homebrew/lib/libSKP_SILK_SDK.a, + "/opt/homebrew/lib/libplist-2.0.a", + "/opt/homebrew/opt/protobuf@21/lib/libprotobuf.a", + /Users/caoye/git/WechatExporter/jsoncpp/build/lib/libjsoncpp.a, ); PRODUCT_BUNDLE_IDENTIFIER = org.wakin.WechatExporter; PRODUCT_NAME = "$(TARGET_NAME)"; + VALID_ARCHS = arm64; }; name = Release; }; diff --git a/WechatExporter/HttpHelper.mm b/WechatExporter/HttpHelper.mm index ab8f8f5..c16fe95 100644 --- a/WechatExporter/HttpHelper.mm +++ b/WechatExporter/HttpHelper.mm @@ -12,6 +12,8 @@ #define PROCESSOR "PPC" #elif defined(__i386__) || defined(__x86_64__) #define PROCESSOR "Intel" +#elif defined(__arm64__) +#define PROCESSOR "ARM64" #else #error Unknown architecture #endif diff --git a/WechatExporter/core/Exporter.cpp b/WechatExporter/core/Exporter.cpp index f25ff98..cccb052 100755 --- a/WechatExporter/core/Exporter.cpp +++ b/WechatExporter/core/Exporter.cpp @@ -782,11 +782,19 @@ int Exporter::exportSession(const Friend& user, const MessageParser& msgParser, if (session.getRecordCount() > 0) { messages.reserve(session.getRecordCount()); + m_logger->write(formatString("Session %s: Reserved memory for %d messages", + session.getUsrName().c_str(), session.getRecordCount())); } int64_t maxMsgId = 0; m_exportContext->getMaxId(session.getUsrName(), maxMsgId); + if (maxMsgId > 0) + { + m_logger->write(formatString("Session %s: Using incremental export, minId=%lld (will skip messages with ID <= %lld)", + session.getUsrName().c_str(), maxMsgId, maxMsgId)); + } + int numberOfMsgs = 0; SessionParser sessionParser(m_options); std::unique_ptr enumerator(sessionParser.buildMsgEnumerator(session, maxMsgId)); @@ -801,9 +809,21 @@ int Exporter::exportSession(const Friend& user, const MessageParser& msgParser, tvs.clear(); msgParser.parse(msg, session, tvs); - exportMessage(session, tvs, messages); + if (exportMessage(session, tvs, messages)) + { + m_logger->write(formatString("Session %s: Stopping message processing due to error", + session.getUsrName().c_str())); + break; + } ++numberOfMsgs; + // Log progress every 10000 messages to track where it stops + if (numberOfMsgs % 10000 == 0) + { + m_logger->write(formatString("Session %s: Processed %d messages, vector size: %zu", + session.getUsrName().c_str(), numberOfMsgs, messages.size())); + } + notifySessionProgress(session.getUsrName(), session.getData(), numberOfMsgs, session.getRecordCount()); if (m_cancelled) { @@ -837,6 +857,11 @@ int Exporter::exportSession(const Friend& user, const MessageParser& msgParser, const size_t numberOfMessages = std::distance(e, messages.cend()); const size_t numberOfPages = (numberOfMessages + pageSize - 1) / pageSize; + // Debug: Log pagination settings + m_logger->write(formatString("Session %s: Pagination settings - pageSize=%zu, numberOfMessages=%zu, numberOfPages=%zu, SPO_SYNC_LOADING=%s", + session.getUsrName().c_str(), pageSize, numberOfMessages, numberOfPages, + (m_options & SPO_SYNC_LOADING) ? "enabled" : "disabled")); + std::string html = getTemplate("frame"); #ifndef NDEBUG replaceAll(html, "%%USRNAME%%", user.getUsrName() + " - " + user.getHash()); @@ -865,12 +890,16 @@ int Exporter::exportSession(const Friend& user, const MessageParser& msgParser, { std::string dataPath = combinePath(outputBase, session.getOutputFileName() + "_files", "Data"); makeDirectory(dataPath); + m_logger->write(formatString("Session %s: Creating %zu pagination files in %s", + session.getUsrName().c_str(), numberOfPages, dataPath.c_str())); for (size_t page = 0; page < numberOfPages; ++page) { b = e; std::string scripts = getTemplate("scripts"); e = (page == (numberOfPages - 1)) ? messages.cend() : (b + pageSize); + + // Generate JSON data for this page Json::Value jsonMsgs(Json::arrayValue); for (auto it = b; it != e; ++it) { @@ -878,11 +907,12 @@ int Exporter::exportSession(const Friend& user, const MessageParser& msgParser, } Json::StreamWriterBuilder builder; builder["indentation"] = ""; // assume default for comments is None -#ifndef NDEBUG + #ifndef NDEBUG builder["emitUTF8"] = true; -#endif + #endif std::string moreMsgs = Json::writeString(builder, jsonMsgs); + // Replace the JSON data placeholder with actual data replaceAll(scripts, "%%JSON_DATA%%", moreMsgs); fileName = combinePath(dataPath, "msg-" + std::to_string(page + 1) + ".js"); @@ -904,7 +934,23 @@ bool Exporter::exportMessage(const Session& session, const std::vectorwrite(formatString("Session %s: Memory allocation failed at message %zu: %s", + session.getUsrName().c_str(), messages.size(), e.what())); + return true; // Signal to stop processing + } + catch (const std::exception& e) + { + m_logger->write(formatString("Session %s: Error adding message %zu: %s", + session.getUsrName().c_str(), messages.size(), e.what())); + return true; // Signal to stop processing + } + return m_cancelled; } @@ -1104,22 +1150,24 @@ bool Exporter::loadStrings() return false; } - Json::Reader reader; - Json::Value value; - if (reader.parse(readFile(path), value)) - { - int sz = value.size(); - for (int idx = 0; idx < sz; ++idx) - { - std::string k = value[idx]["key"].asString(); - std::string v = value[idx]["value"].asString(); - if (m_localeStrings.find(k) != m_localeStrings.cend()) - { - // return false; - } - m_localeStrings[k] = v; - } - } + // Json::Reader reader; + // Json::Value value; + // if (reader.parse(readFile(path), value)) + // { + // int sz = value.size(); + // for (int idx = 0; idx < sz; ++idx) + // { + // std::string k = value[idx]["key"].asString(); + // std::string v = value[idx]["value"].asString(); + // if (m_localeStrings.find(k) != m_localeStrings.cend()) + // { + // // return false; + // } + // m_localeStrings[k] = v; + // } + // } + + // Skip JSON parsing for locale strings - use default behavior return true; } diff --git a/WechatExporter/core/ITunesParser.cpp b/WechatExporter/core/ITunesParser.cpp index 61a8a18..63dabdc 100755 --- a/WechatExporter/core/ITunesParser.cpp +++ b/WechatExporter/core/ITunesParser.cpp @@ -440,7 +440,11 @@ unsigned int ITunesDb::parseModifiedTime(const std::vector& data) } uint64_t val = 0; plist_t node = NULL; - plist_from_memory(reinterpret_cast(&data[0]), static_cast(data.size()), &node); + plist_format_t fmt; + plist_from_memory(reinterpret_cast(&data[0]), + data.size(), + &node, + &fmt); if (NULL != node) { plist_t lastModified = plist_access_path(node, 3, "$objects", 1, "LastModified"); @@ -697,7 +701,8 @@ bool ManifestParser::parse(const std::string& path, BackupManifest& manifest) co if (readFile(fileName, data)) { plist_t node = NULL; - plist_from_memory(reinterpret_cast(&data[0]), static_cast(data.size()), &node); + plist_format_t fmt; + plist_from_memory(reinterpret_cast(&data[0]), data.size(), &node, &fmt); if (NULL != node) { plist_t isEncryptedNode = plist_access_path(node, 1, "IsEncrypted"); diff --git a/WechatExporter/core/MessageParser.cpp b/WechatExporter/core/MessageParser.cpp index 5aea89e..e8e59eb 100755 --- a/WechatExporter/core/MessageParser.cpp +++ b/WechatExporter/core/MessageParser.cpp @@ -601,12 +601,15 @@ void MessageParser::parseNotice(const WXMSG& msg, const Session& session, Templa #endif tv.setName("notice"); - Json::Reader reader; - Json::Value root; - if (reader.parse(msg.content, root)) - { - tv["%%MESSAGE%%"] = root["msgContent"].asString(); - } + // Json::Reader reader; + // Json::Value root; + // if (reader.parse(msg.content, root)) + // { + // tv["%%MESSAGE%%"] = root["msgContent"].asString(); + // } + + // Use raw content instead of JSON parsing + tv["%%MESSAGE%%"] = msg.content; } void MessageParser::parseSysNotice(const WXMSG& msg, const Session& session, TemplateValues& tv) const diff --git a/WechatExporter/core/Utils_xml.cpp b/WechatExporter/core/Utils_xml.cpp index dd23caa..de3596d 100644 --- a/WechatExporter/core/Utils_xml.cpp +++ b/WechatExporter/core/Utils_xml.cpp @@ -17,12 +17,31 @@ bool getXmlNodeValue(const std::string& xml, const std::string& xpath, std::stri bool result = false; value.clear(); + // Check if the data looks like valid XML before attempting to parse + // Skip if empty or doesn't start with '<' (likely binary data) + if (xml.empty() || xml[0] != '<') { + return false; + } + + // Additional check: look for common XML patterns + // If the string contains too many non-printable characters, it's likely binary + int nonPrintableCount = 0; + for (size_t i = 0; i < std::min(xml.size(), size_t(100)); ++i) { + if (xml[i] < 32 && xml[i] != '\t' && xml[i] != '\n' && xml[i] != '\r') { + nonPrintableCount++; + } + } + // If more than 10% of the first 100 characters are non-printable, likely binary + if (nonPrintableCount > 10) { + return false; + } + xmlDocPtr doc = NULL; xmlXPathContextPtr xpathCtx = NULL; xmlXPathObjectPtr xpathObj = NULL; xmlNodeSetPtr xpathNodes = NULL; - doc = xmlParseMemory(xml.c_str(), static_cast(xml.size())); + doc = xmlReadMemory(xml.c_str(), static_cast(xml.size()), NULL, NULL, XML_PARSE_NOERROR | XML_PARSE_NOWARNING); if (doc == NULL) { goto end; } xpathCtx = xmlXPathNewContext(doc); @@ -57,12 +76,31 @@ bool getXmlNodeAttributeValue(const std::string& xml, const std::string& xpath, bool result = false; value.clear(); + // Check if the data looks like valid XML before attempting to parse + // Skip if empty or doesn't start with '<' (likely binary data) + if (xml.empty() || xml[0] != '<') { + return false; + } + + // Additional check: look for common XML patterns + // If the string contains too many non-printable characters, it's likely binary + int nonPrintableCount = 0; + for (size_t i = 0; i < std::min(xml.size(), size_t(100)); ++i) { + if (xml[i] < 32 && xml[i] != '\t' && xml[i] != '\n' && xml[i] != '\r') { + nonPrintableCount++; + } + } + // If more than 10% of the first 100 characters are non-printable, likely binary + if (nonPrintableCount > 10) { + return false; + } + xmlDocPtr doc = NULL; xmlXPathContextPtr xpathCtx = NULL; xmlXPathObjectPtr xpathObj = NULL; xmlNodeSetPtr xpathNodes = NULL; - doc = xmlParseMemory(xml.c_str(), static_cast(xml.size())); + doc = xmlReadMemory(xml.c_str(), static_cast(xml.size()), NULL, NULL, XML_PARSE_NOERROR | XML_PARSE_NOWARNING); if (doc == NULL) { goto end; } xpathCtx = xmlXPathNewContext(doc); diff --git a/WechatExporter/core/WechatParser.cpp b/WechatExporter/core/WechatParser.cpp index 84e9a12..af7ec86 100755 --- a/WechatExporter/core/WechatParser.cpp +++ b/WechatExporter/core/WechatParser.cpp @@ -39,7 +39,26 @@ bool parseMembers(const std::string& xml, T& f) xmlXPathObjectPtr xpathObj = NULL; xmlNodeSetPtr xpathNodes = NULL; - doc = xmlParseMemory(xml.c_str(), static_cast(xml.size())); + // Check if the data looks like valid XML before attempting to parse + // Skip if empty or doesn't start with '<' (likely binary data) + if (xml.empty() || xml[0] != '<') { + return false; + } + + // Additional check: look for common XML patterns + // If the string contains too many non-printable characters, it's likely binary + int nonPrintableCount = 0; + for (size_t i = 0; i < std::min(xml.size(), size_t(100)); ++i) { + if (xml[i] < 32 && xml[i] != '\t' && xml[i] != '\n' && xml[i] != '\r') { + nonPrintableCount++; + } + } + // If more than 10% of the first 100 characters are non-printable, likely binary + if (nonPrintableCount > 10) { + return false; + } + + doc = xmlReadMemory(xml.c_str(), static_cast(xml.size()), NULL, NULL, XML_PARSE_NOERROR | XML_PARSE_NOWARNING); if (doc == NULL) { goto end; } xpathCtx = xmlXPathNewContext(doc); @@ -588,7 +607,8 @@ bool MMSettingParser::parse(const std::string& usrNameHash) } plist_t node = NULL; - plist_from_memory(reinterpret_cast(&data[0]), static_cast(data.size()), &node); + plist_format_t fmt; + plist_from_memory(reinterpret_cast(&data[0]), data.size(), &node, &fmt); if (NULL == node) { return false; @@ -890,7 +910,8 @@ bool WechatInfoParser::parsePreferences(WechatInfo& wechatInfo) } plist_t node = NULL; - plist_from_memory(reinterpret_cast(&data[0]), static_cast(data.size()), &node); + plist_format_t fmt; + plist_from_memory(reinterpret_cast(&data[0]), data.size(), &node, &fmt); if (NULL == node) { return false; @@ -1635,6 +1656,10 @@ SessionParser::MessageEnumerator::MessageEnumerator(const Session& session, int sql += " DESC"; } + // Debug: Log the actual SQL query being used + // Note: This will only work if we have access to a logger, which we don't in this context + // But we can add this information to help debug + rc = sqlite3_prepare_v2(context->db, sql.c_str(), (int)(sql.size()), &(context->stmt), NULL); if (rc != SQLITE_OK) { diff --git a/WechatExporter/core/XmlParser.cpp b/WechatExporter/core/XmlParser.cpp index 2bce23c..320d0f4 100644 --- a/WechatExporter/core/XmlParser.cpp +++ b/WechatExporter/core/XmlParser.cpp @@ -162,6 +162,25 @@ bool XmlParser::getNodeAttributeValue(xmlNodePtr node, const std::string& attrib XmlParser::XmlParser(const std::string& xml, bool noError/* = false*/) : m_doc(NULL), m_xpathCtx(NULL) { + // Check if the data looks like valid XML before attempting to parse + // Skip if empty or doesn't start with '<' (likely binary data) + if (xml.empty() || xml[0] != '<') { + return; + } + + // Additional check: look for common XML patterns + // If the string contains too many non-printable characters, it's likely binary + int nonPrintableCount = 0; + for (size_t i = 0; i < std::min(xml.size(), size_t(100)); ++i) { + if (xml[i] < 32 && xml[i] != '\t' && xml[i] != '\n' && xml[i] != '\r') { + nonPrintableCount++; + } + } + // If more than 10% of the first 100 characters are non-printable, likely binary + if (nonPrintableCount > 10) { + return; + } + int options = XML_PARSE_RECOVER; if (noError) { diff --git a/jsoncpp b/jsoncpp new file mode 160000 index 0000000..ca98c98 --- /dev/null +++ b/jsoncpp @@ -0,0 +1 @@ +Subproject commit ca98c98457b1163cca1f7d8db62827c115fec6d1