Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions BinaryData/Neuralyzer/Instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Step-by-step Instructions:
- Never assume document state; always fetch it first.
- Do not use tools for conceptual explanations.
- Do not suggest or recommend that the user use tools; only the model has access to them.
- You may fetch web pages from the internet (e.g. online plugin documentation, manuals or tutorials) when external information is required to fulfil the request; prefer the provided Partiels resources first.
3. Resolve document entities.
- Fetch the current document.
- Resolve user-referenced group and track names to stable UUIDs.
Expand Down
46 changes: 46 additions & 0 deletions BinaryData/Neuralyzer/tools_list.json
Original file line number Diff line number Diff line change
Expand Up @@ -1262,6 +1262,52 @@
}
}
}
},
{
"name": "fetch_web_page",
"description": "Fetch the content of a web page from the internet by its URL. Use this to consult online documentation, manuals, tutorials or other web resources. Returns the page content as readable plain text by default, or raw HTML.",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The absolute http or https URL of the web page to fetch"
},
"format": {
"type": "string",
"enum": ["text", "html"],
"description": "Return the page as readable plain text (default) or raw HTML",
"default": "text"
},
"maxLength": {
"type": "integer",
"description": "Maximum number of characters to return; the content is truncated beyond this length (defaults to 100000)",
"default": 100000
}
},
"required": ["url"]
},
"outputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The fetched URL"
},
"format": {
"type": "string",
"description": "The format of the returned content (text or html)"
},
"truncated": {
"type": "boolean",
"description": "Whether the content was truncated to maxLength"
},
"content": {
"type": "string",
"description": "The fetched page content"
}
}
}
}
]
}
164 changes: 164 additions & 0 deletions Source/Application/AnlApplicationNeuralyzerMcp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include "AnlApplicationTools.h"
#include <AnlNeuralyzerData.h>

#include <regex>

ANALYSE_FILE_BEGIN

nlohmann::json Application::Neuralyzer::Mcp::createError(std::string const& what)
Expand Down Expand Up @@ -121,6 +123,152 @@ namespace Application::Neuralyzer::Mcp
}
}

// Converts an HTML document to readable plain text: removes script/style blocks
// and comments, turns block-level tags into line breaks, strips the remaining
// tags, decodes a few common HTML entities and normalises whitespace.
static std::string htmlToPlainText(std::string html)
{
try
{
static std::regex const scriptStyle(R"(<(script|style)\b[^>]*>[\s\S]*?</\1>)", std::regex::icase | std::regex::optimize);
html = std::regex_replace(html, scriptStyle, " ");
static std::regex const comments(R"(<!--[\s\S]*?-->)", std::regex::optimize);
html = std::regex_replace(html, comments, " ");
static std::regex const blocks(R"(<\s*(br\s*/?|/p|/div|/h[1-6]|/li|/tr|/table|/section|/article|/header|/footer)\s*>)", std::regex::icase | std::regex::optimize);
html = std::regex_replace(html, blocks, "\n");
static std::regex const tags(R"(<[^>]+>)", std::regex::optimize);
html = std::regex_replace(html, tags, " ");
static std::regex const spaces(R"([ \t\f\v]+)", std::regex::optimize);
html = std::regex_replace(html, spaces, " ");
}
catch(std::exception const&)
{
// If the regex engine fails (e.g. on extremely large input) fall back to the raw string.
}

juce::String text = juce::String::fromUTF8(html.c_str());
text = text.replace("&nbsp;", " ")
.replace("&amp;", "&")
.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("&quot;", "\"")
.replace("&#39;", "'")
.replace("&apos;", "'");

// Trim each line and collapse runs of blank lines.
auto const lines = juce::StringArray::fromLines(text);
juce::StringArray cleaned;
auto blankCount = 0;
for(auto const& rawLine : lines)
{
auto const line = rawLine.trim();
if(line.isEmpty())
{
if(++blankCount > 1)
{
continue;
}
}
else
{
blankCount = 0;
}
cleaned.add(line);
}
return cleaned.joinIntoString("\n").trim().toStdString();
}

// Fetches a web page over http(s). This runs off the message thread because the
// network request is blocking and would otherwise freeze the UI; unlike the other
// tools it does not access the document model, so it is safe to call directly.
static nlohmann::json performWebFetch(nlohmann::json const& request)
{
MiscDebug("Application::Neuralyzer::Mcp::Dispatcher", "Received MCP web fetch");
if(!request.contains("params") || !request.at("params").is_object())
{
return createError("The 'params' field is required and must be an object.");
}
auto const& methodParams = request.at("params");
if(!methodParams.contains("arguments") || !methodParams.at("arguments").is_object())
{
return createError("The 'arguments' field is required and must be an object.");
}
auto const& arguments = methodParams.at("arguments");
if(!arguments.contains("url") || !arguments.at("url").is_string())
{
return createError("The 'url' argument is required and must be a string.");
}
if(arguments.contains("format") && !arguments.at("format").is_string())
{
return createError("The 'format' argument must be a string ('text' or 'html').");
}
if(arguments.contains("maxLength") && !arguments.at("maxLength").is_number_integer())
{
return createError("The 'maxLength' argument must be an integer.");
}

auto const urlString = juce::String(arguments.at("url").get<std::string>());
auto const format = juce::String(arguments.value("format", std::string("text"))).toLowerCase();
if(format != "text" && format != "html")
{
return createError("The 'format' argument must be either 'text' or 'html'.");
}
auto const maxLength = std::max(0, arguments.value("maxLength", 100000));

juce::URL const url(urlString);
auto const scheme = url.getScheme().toLowerCase();
if(!url.isWellFormed() || (scheme != "http" && scheme != "https"))
{
return createError("The 'url' argument must be a well-formed http or https URL.");
}

int statusCode = 0;
auto const options = juce::URL::InputStreamOptions(juce::URL::ParameterHandling::inAddress)
.withExtraHeaders("User-Agent: Partiels-Neuralyzer\r\nAccept: text/html,application/xhtml+xml,text/plain")
.withStatusCode(&statusCode)
.withConnectionTimeoutMs(15000)
.withNumRedirectsToFollow(5);
auto stream = url.createInputStream(options);
if(stream == nullptr)
{
return createError("Failed to connect to URL: " + urlString.toStdString());
}
auto const body = stream->readEntireStreamAsString();

nlohmann::json response;
response["content"] = nlohmann::json::array();
if(statusCode != 0 && (statusCode < 200 || statusCode >= 300))
{
response["isError"] = true;
nlohmann::json content;
content["type"] = "text";
content["text"] = "The server returned HTTP " + std::to_string(statusCode) + " when fetching " + urlString.toStdString();
response["content"].push_back(std::move(content));
return response;
}
response["isError"] = false;

juce::String resultText = (format == "html") ? body : juce::String(htmlToPlainText(body.toStdString()));
auto truncated = false;
if(maxLength > 0 && resultText.length() > maxLength)
{
resultText = resultText.substring(0, maxLength);
truncated = true;
}

nlohmann::json payload;
payload["url"] = urlString.toStdString();
payload["format"] = format.toStdString();
payload["truncated"] = truncated;
payload["content"] = resultText.toStdString();

nlohmann::json content;
content["type"] = "text";
content["text"] = payload.dump();
response["content"].push_back(std::move(content));
return response;
}

static nlohmann::json performToolsCall(nlohmann::json const& request, nlohmann::json const& context)
{
MiscDebug("Application::Neuralyzer::Mcp::Dispatcher", "Received MCP tools/call");
Expand Down Expand Up @@ -2558,6 +2706,22 @@ nlohmann::json Application::Neuralyzer::Mcp::Dispatcher::callTools(nlohmann::jso
}
if(method == "tools/call")
{
// Network tools must run off the message thread (a blocking fetch would freeze
// the UI) and do not touch the document model, so handle them directly here
// instead of through the synchronous message-thread dispatch below.
if(request.contains("params") && request.at("params").is_object() &&
request.at("params").value("name", std::string{}) == "fetch_web_page")
{
try
{
return performWebFetch(request);
}
catch(std::exception const& e)
{
return createError(e.what());
}
}

nlohmann::json response;
juce::MessageManager::callSync([&]()
{
Expand Down
2 changes: 1 addition & 1 deletion Source/Application/AnlApplicationNeuralyzerSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ Application::Neuralyzer::SettingsContent::SettingsContent(Accessor& accessor)
mBackend.entry.setSelectedItemIndex(static_cast<int>(index), juce::NotificationType::dontSendNotification);
mRemoteUrl.setVisible(backend == AgentBackend::remote);
mBatchSize.setVisible(backend == AgentBackend::local);
//mPresencePenalty.setVisible(backend == AgentBackend::local);
// mPresencePenalty.setVisible(backend == AgentBackend::local);
mModelsDirectory.setVisible(backend == AgentBackend::local);
resized();
break;
Expand Down
Loading