Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,16 @@ Cancel ctx

if (exporters.Contains(Exporter.Html))
{
var sitemapBuilder = new SitemapBuilder(navigation.NavigationItems, assembleContext.WriteFileSystem, assembleContext.OutputWithPathPrefixDirectory);
sitemapBuilder.Generate();
// Build-time sitemap uses current date as placeholder for backwards compatibility.
// Production sitemap with correct last_updated dates is generated via
// `assembler sitemap` after ES indexing, which overwrites this file.
var urls = navigation.NavigationItems
.SelectMany(SitemapNavigationHelper.Flatten)
.Select(n => n.Url)
.Distinct();
var now = DateTimeOffset.UtcNow;
var entries = urls.ToDictionary(u => u, _ => now);
SitemapBuilder.Generate(entries, assembleContext.WriteFileSystem, assembleContext.OutputWithPathPrefixDirectory);
}

if (exporters.Contains(Exporter.LLMText))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.IO.Abstractions;
using Actions.Core.Services;
using Elastic.Documentation.Configuration;
using Elastic.Documentation.Configuration.Assembler;
using Elastic.Documentation.Diagnostics;
using Elastic.Documentation.Search;
using Elastic.Documentation.Services;
using Elastic.Markdown.Exporters.Elasticsearch;
using Microsoft.Extensions.Logging;

namespace Elastic.Documentation.Assembler.Building;

public class AssemblerSitemapService(
ILoggerFactory logFactory,
AssemblyConfiguration assemblyConfiguration,
IConfigurationContext configurationContext,
ICoreService githubActionsService
) : IService
{
private readonly ILogger _logger = logFactory.CreateLogger<AssemblerSitemapService>();

public async Task<bool> GenerateSitemapAsync(
IDiagnosticsCollector collector,
FileSystem fileSystem,
string? endpoint = null,
string? environment = null,
string? apiKey = null,
string? username = null,
string? password = null,
bool? debugMode = null,
string? proxyAddress = null,
string? proxyPassword = null,
string? proxyUsername = null,
bool? disableSslVerification = null,
string? certificateFingerprint = null,
string? certificatePath = null,
bool? certificateNotRoot = null,
Cancel ctx = default
)
{
var githubEnvironmentInput = githubActionsService.GetInput("environment");
environment ??= !string.IsNullOrEmpty(githubEnvironmentInput) ? githubEnvironmentInput : "dev";

_logger.LogInformation("Generating sitemap from ES index for environment {Environment}", environment);

var assembleContext = new AssembleContext(
assemblyConfiguration, configurationContext, environment, collector,
fileSystem, fileSystem, null, null
);

var cfg = configurationContext.Endpoints.Elasticsearch;
var options = new ElasticsearchIndexOptions
{
Endpoint = endpoint,
ApiKey = apiKey,
Username = username,
Password = password,
DebugMode = debugMode,
ProxyAddress = proxyAddress,
ProxyPassword = proxyPassword,
ProxyUsername = proxyUsername,
DisableSslVerification = disableSslVerification,
CertificateFingerprint = certificateFingerprint,
CertificatePath = certificatePath,
CertificateNotRoot = certificateNotRoot
};
await ElasticsearchEndpointConfigurator.ApplyAsync(cfg, options, collector, fileSystem, ctx);

if (collector.Errors > 0)
return false;

var transport = ElasticsearchTransportFactory.Create(cfg);

var indexName = DocumentationMappingContext.DocumentationDocument
.CreateContext(type: "assembler", env: environment)
.ResolveReadTarget();

_logger.LogInformation("Querying index {Index} for sitemap entries", indexName);

var reader = new EsSitemapReader(transport, _logger, indexName);
var entries = new Dictionary<string, DateTimeOffset>();

await foreach (var entry in reader.ReadAllAsync(ctx))
entries[entry.Url] = entry.LastUpdated;

_logger.LogInformation("Fetched {Count} sitemap entries from ES", entries.Count);

if (entries.Count == 0)
{
collector.EmitGlobalError("No documents found in ES index — cannot generate sitemap");
return false;
}

SitemapBuilder.Generate(entries, assembleContext.WriteFileSystem, assembleContext.OutputWithPathPrefixDirectory);

_logger.LogInformation("Sitemap written to {Path}", assembleContext.OutputWithPathPrefixDirectory.FullName);
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.Globalization;
using System.Runtime.CompilerServices;
using Elastic.Transport;
using Elastic.Transport.Products.Elasticsearch;
using Microsoft.Extensions.Logging;

namespace Elastic.Documentation.Assembler.Building;

public record SitemapEntry(string Url, DateTimeOffset LastUpdated);

/// <summary>Reads all url + last_updated pairs from the ES lexical index using search_after with PIT.</summary>
public class EsSitemapReader(DistributedTransport transport, ILogger logger, string indexName)
{
private const int PageSize = 1000;
private const string PitKeepAlive = "2m";

public async IAsyncEnumerable<SitemapEntry> ReadAllAsync([EnumeratorCancellation] Cancel ct = default)
{
var pitId = await OpenPitAsync(ct);
try
{
object[]? lastSortValues = null;
var page = 0;
int hitCount;

do
{
var body = BuildSearchBody(pitId, lastSortValues);
var response = await transport.PostAsync<DynamicResponse>("/_search", PostData.String(body), ct);

if (!response.ApiCallDetails.HasSuccessfulStatusCode)
throw new InvalidOperationException(
$"ES search failed (page {page}): {response.ApiCallDetails.HttpStatusCode} {response.ApiCallDetails.DebugInformation}");

// Update PIT id — ES may return a new one on each response
pitId = response.Body.Get<string>("pit_id") ?? pitId;

var hits = response.Body.Get<object[]>("hits.hits");
hitCount = hits?.Length ?? 0;

if (hits is not null)
{
foreach (var hit in hits)
{
if (hit is not IDictionary<string, object> dict
|| dict["_source"] is not IDictionary<string, object> source)
continue;

var url = source["url"]?.ToString();
var lastUpdatedStr = source["last_updated"]?.ToString();

if (url is null || lastUpdatedStr is null)
continue;

// Use sort array from the hit for search_after cursor
if (dict.TryGetValue("sort", out var sortObj) && sortObj is object[] sortValues)
lastSortValues = sortValues;

var lastUpdated = DateTimeOffset.Parse(lastUpdatedStr, CultureInfo.InvariantCulture);
yield return new SitemapEntry(url, lastUpdated);
}
Comment on lines +47 to +65
}

page++;
logger.LogInformation("Sitemap: fetched page {Page} ({Hits} hits)", page, hitCount);

} while (hitCount == PageSize);
}
finally
{
await ClosePitAsync(pitId, ct);
}
}

private async Task<string> OpenPitAsync(Cancel ct)
{
var response = await transport.PostAsync<DynamicResponse>(
$"/{indexName}/_pit?keep_alive={PitKeepAlive}", PostData.Empty, ct);

if (!response.ApiCallDetails.HasSuccessfulStatusCode)
throw new InvalidOperationException(
$"Failed to open PIT on {indexName}: {response.ApiCallDetails.HttpStatusCode} {response.ApiCallDetails.DebugInformation}");

var pitId = response.Body.Get<string>("id");
if (string.IsNullOrEmpty(pitId))
throw new InvalidOperationException("PIT response did not contain an id");

logger.LogInformation("Opened PIT on {Index}: {PitId}", indexName, pitId[..Math.Min(20, pitId.Length)] + "...");
return pitId;
}

private async Task ClosePitAsync(string pitId, Cancel ct)
{
try
{
var body = $$"""{"id":"{{pitId}}"}""";
_ = await transport.DeleteAsync<DynamicResponse>("/_pit", default!, PostData.String(body), ct);
logger.LogInformation("Closed PIT");
}
catch (Exception ex)
{
logger.LogWarning(ex, "Failed to close PIT (non-fatal)");
}
Comment on lines +104 to +107
}

internal static string BuildSearchBody(string pitId, object[]? searchAfter)
{
var searchAfterClause = "";
if (searchAfter is { Length: > 0 })
{
var values = string.Join(",", searchAfter.Select(v => $"\"{EscapeJson(v?.ToString() ?? "")}\""));
searchAfterClause = $",\"search_after\":[{values}]";
}

return $$"""
{
"size": {{PageSize}},
"_source": ["url", "last_updated"],
"query": { "bool": { "must_not": [{ "term": { "hidden": true } }] } },
"pit": { "id": "{{EscapeJson(pitId)}}", "keep_alive": "{{PitKeepAlive}}" },
"sort": [{ "url": "asc" }]{{searchAfterClause}}
}
""";
}

private static string EscapeJson(string value) =>
value.Replace("\\", "\\\\").Replace("\"", "\\\"");
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,78 +12,58 @@

namespace Elastic.Documentation.Assembler.Building;

// TODO rewrite as real exporter
public class SitemapBuilder(
IReadOnlyCollection<INavigationItem> navigationItems,
IFileSystem fileSystem,
IDirectoryInfo pathPrefixedOutputFolder
)
public static class SitemapBuilder
{
private static readonly Uri BaseUri = new("https://www.elastic.co");

public void Generate()
/// <summary>Generates sitemap.xml with per-URL last_updated dates.</summary>
public static void Generate(
IReadOnlyDictionary<string, DateTimeOffset> entries,
IFileSystem fileSystem,
IDirectoryInfo outputFolder
)
{
var flattenedNavigationItems = GetNavigationItems(navigationItems);

var doc = new XDocument
{
Declaration = new XDeclaration("1.0", "utf-8", "yes")
};

XNamespace ns = "http://www.sitemaps.org/schemas/sitemap/0.9";

var currentDate = DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture);
var root = new XElement(
ns + "urlset",
new XAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9"),
flattenedNavigationItems
.Select(n => n switch
{
INodeNavigationItem<INavigationModel, INavigationItem> group => (group.Url, NavigationItem: group),
ILeafNavigationItem<INavigationModel> file => (file.Url, NavigationItem: file as INavigationItem),
_ => throw new Exception($"{nameof(SitemapBuilder)}.{nameof(Generate)}: Unhandled navigation item type: {n.GetType()}")
})
.Select(n => n.Url)
.Distinct()
.Select(u => new Uri(BaseUri, u))
.Select(u => new XElement(ns + "url", [
new XElement(ns + "loc", u),
new XElement(ns + "lastmod", currentDate)
entries
.OrderBy(e => e.Key, StringComparer.Ordinal)
.Select(e => new XElement(ns + "url", [
new XElement(ns + "loc", new Uri(BaseUri, e.Key)),
new XElement(ns + "lastmod", e.Value.ToString("o", CultureInfo.InvariantCulture))
]))
);

doc.Add(root);

using var fileStream = fileSystem.File.Create(fileSystem.Path.Combine(pathPrefixedOutputFolder.FullName, "sitemap.xml"));
if (!outputFolder.Exists)
_ = fileSystem.Directory.CreateDirectory(outputFolder.FullName);

using var fileStream = fileSystem.File.Create(fileSystem.Path.Combine(outputFolder.FullName, "sitemap.xml"));
doc.Save(fileStream);
}
}

private static IReadOnlyCollection<INavigationItem> GetNavigationItems(IReadOnlyCollection<INavigationItem> items)
{
var result = new List<INavigationItem>();
foreach (var item in items)
/// <summary>Extracts URLs from navigation items for sitemap generation.</summary>
public static class SitemapNavigationHelper
{
public static IEnumerable<INavigationItem> Flatten(INavigationItem item) =>
item switch
{
switch (item)
{
case ILeafNavigationItem<CrossLinkModel>:
case ILeafNavigationItem<DetectionRuleFile>:
case ILeafNavigationItem<INavigationModel> { Hidden: true }:
continue;
case ILeafNavigationItem<INavigationModel> file:
result.Add(file);
break;
case INodeNavigationItem<INavigationModel, INavigationItem> group:
if (item.Hidden)
continue;

result.AddRange(GetNavigationItems(group.NavigationItems));
result.Add(group);
break;
default:
throw new Exception($"{nameof(SitemapBuilder)}.{nameof(GetNavigationItems)}: Unhandled navigation item type: {item.GetType()}");
}
}

return result;
}
ILeafNavigationItem<CrossLinkModel> => [],
ILeafNavigationItem<DetectionRuleFile> => [],
ILeafNavigationItem<INavigationModel> { Hidden: true } => [],
ILeafNavigationItem<INavigationModel> file => [file],
INodeNavigationItem<INavigationModel, INavigationItem> { Hidden: true } => [],
INodeNavigationItem<INavigationModel, INavigationItem> group =>
group.NavigationItems.SelectMany(Flatten).Append(group),
_ => []
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
<_Parameter1>Elastic.Documentation.Build.Tests</_Parameter1>
</AssemblyAttribute>
</ItemGroup>

<ItemGroup>
<PackageReference Include="GitHub.Actions.Core" />
<PackageReference Include="ModelContextProtocol" />
Expand Down
Loading
Loading