Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d0662c2
Add crawl-indexer CLI tool for site and guide crawling/indexing (cher…
Mpdreamz Mar 5, 2026
133b196
Cherry-pick crawl-indexer stage commit with translation discovery
Mpdreamz Mar 5, 2026
e56eafb
Update crawl-indexer to use IncrementalSyncOrchestrator and Elastic.M…
Mpdreamz Mar 5, 2026
9805d05
Extract shared IDocument interface and common mapping/analysis config
Mpdreamz Mar 5, 2026
f7a4dba
Add --unchanged flag and fix site indexer for incremental runs
Mpdreamz Mar 5, 2026
1d165aa
Add subcommands for site/guide: index, clean, ai
Mpdreamz Mar 5, 2026
d79f97a
Add VHS recording of site index dry-run
Mpdreamz Mar 5, 2026
bd16fbe
Fix --max-run-docs: pass through to AiEnrichmentOptions.MaxEnrichment…
Mpdreamz Mar 5, 2026
a7aebce
Fix AI enrichment: don't break async enumerable before cache/backfill…
Mpdreamz Mar 5, 2026
b1a9779
Fix CodeQL findings in crawl-indexer
Mpdreamz Mar 5, 2026
dc415e4
Bump Elastic.Ingest.Elasticsearch and Elastic.Mapping to 0.35.0
Mpdreamz Mar 5, 2026
09724dc
Bump Elastic.Mapping and Elastic.Ingest.Elasticsearch to 0.37.0
Mpdreamz Mar 5, 2026
ad47d66
Apply suggestions from code review
Mpdreamz Mar 13, 2026
8004d16
Merge branch 'main' into feature/crawl-indexer-v2
Mpdreamz Mar 13, 2026
6a2a447
Introduce a configuration option to set changelog naming scheme (#2884)
cotti Mar 13, 2026
3419918
Update link to agent skills in landing page (#2899)
florent-leborgne Mar 13, 2026
a825e55
Fix x-state in API explorer (#2894)
lcawl Mar 13, 2026
00adec6
[Automation] Bump product version numbers (#2895)
elastic-observability-automation[bot] Mar 13, 2026
e0eac30
update to latest ingest
Mpdreamz Mar 15, 2026
ec15cd7
clean up namespaces
Mpdreamz Mar 15, 2026
0bd58a7
Replace translation URL probing with language sitemaps
Mpdreamz Mar 15, 2026
4b1999e
Code review cleanup for crawl-indexer-v2
Mpdreamz Mar 15, 2026
5a8c867
Fix GuideHtmlExtractor interface implementation
Mpdreamz Mar 15, 2026
5385f8d
Fix remaining CA1725/CA1816/CA1852 build errors in crawl-indexer
Mpdreamz Mar 15, 2026
4119dc3
Fix });); syntax error in GuideCommand and commit remaining format/us…
Mpdreamz Mar 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .vhs/site-index-dry-run.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions .vhs/site-index-dry-run.tape
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Output .vhs/site-index-dry-run.gif

Set Shell "zsh"
Set FontSize 14
Set Width 1400
Set Height 900
Set Padding 20
Set Theme "Catppuccin Mocha"
Set TypingSpeed 40ms

Type "dotnet run --project src/tooling/crawl-indexer/ -- site index --fair --dry-run --max-pages 100 --rps 10 --no-ai"
Enter
Sleep 120s
2 changes: 2 additions & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
<PackageVersion Include="AWSSDK.SQS" Version="4.0.2.14" />
<PackageVersion Include="AWSSDK.S3" Version="4.0.18.6" />
<PackageVersion Include="Elastic.OpenTelemetry" Version="1.1.0" />
<PackageVersion Include="Microsoft.Extensions.Configuration.EnvironmentVariables" Version="10.0.0" />
<PackageVersion Include="Microsoft.Extensions.Configuration.UserSecrets" Version="10.0.0" />
<PackageVersion Include="Microsoft.Extensions.Configuration.Abstractions" Version="10.0.3" />
<PackageVersion Include="Microsoft.Extensions.Telemetry.Abstractions" Version="10.0.0" />
Expand Down Expand Up @@ -73,6 +74,7 @@
<PackageVersion Include="Crayon" Version="2.0.69" />
<PackageVersion Include="DotNet.Glob" Version="3.1.3" />
<PackageVersion Include="Errata" Version="0.15.0" />
<PackageVersion Include="Spectre.Console" Version="0.54.0" />
<PackageVersion Include="Github.Actions.Core" Version="9.0.0" />
<PackageVersion Include="Microsoft.Extensions.Logging" Version="10.0.0" />
<PackageVersion Include="Microsoft.Extensions.Logging.Console" Version="10.0.0" />
Expand Down
2 changes: 2 additions & 0 deletions docs-builder.slnx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
<Project Path="src/Elastic.Documentation.Svg/Elastic.Documentation.Svg.csproj" />
<Project Path="src/Elastic.Documentation/Elastic.Documentation.csproj" />
<Project Path="src/Elastic.Markdown/Elastic.Markdown.csproj" />
<Project Path="src/Elastic.Documentation.Tooling/Elastic.Documentation.Tooling.csproj" />
</Folder>
<Folder Name="/src/api/">
<Project Path="src/api/Elastic.Documentation.Api.Core/Elastic.Documentation.Api.Core.csproj" />
Expand All @@ -80,6 +81,7 @@
</Folder>
<Folder Name="/src/tooling/">
<File Path="src/tooling/Directory.Build.props" />
<Project Path="src/tooling/crawl-indexer/crawl-indexer.csproj" />
<Project Path="src/tooling/docs-builder/docs-builder.csproj" />
</Folder>
<Folder Name="/tests-integration/">
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<RootNamespace>Elastic.Documentation.Tooling</RootNamespace>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Github.Actions.Core" />
<PackageReference Include="Microsoft.Extensions.Configuration.EnvironmentVariables" />
<PackageReference Include="Microsoft.Extensions.Configuration.UserSecrets" />
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" />
<PackageReference Include="Microsoft.Extensions.ServiceDiscovery" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Elastic.Documentation\Elastic.Documentation.csproj" />
<ProjectReference Include="..\Elastic.Documentation.Configuration\Elastic.Documentation.Configuration.csproj" />
</ItemGroup>

</Project>
177 changes: 177 additions & 0 deletions src/Elastic.Documentation.Tooling/ToolingDefaults.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.Diagnostics.CodeAnalysis;
using System.Net.Sockets;
using Actions.Core.Extensions;
using Actions.Core.Services;
using Elastic.Documentation.Configuration;
using Elastic.Documentation.Configuration.LegacyUrlMappings;
using Elastic.Documentation.Configuration.Products;
using Elastic.Documentation.Configuration.Search;
using Elastic.Documentation.Configuration.Versions;
using Elastic.Documentation.Diagnostics;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Configuration.UserSecrets;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.ServiceDiscovery;

namespace Elastic.Documentation.Tooling;

public static class ToolingDefaults
{
private const string UserSecretsId = "72f50f33-6fb9-4d08-bff3-39568fe370b3";

/// <summary>
/// Adds common tooling defaults for CLI applications.
/// </summary>
/// <param name="builder">The host application builder</param>
/// <param name="diagnosticsCollectorFactory">Factory to create the diagnostics collector</param>
public static TBuilder AddToolingDefaults<TBuilder>(
this TBuilder builder,
Func<IServiceProvider, IDiagnosticsCollector> diagnosticsCollectorFactory
) where TBuilder : IHostApplicationBuilder
{
// Build configuration with user secrets support (Aspire user secrets ID)
var configBuilder = new ConfigurationBuilder();
_ = configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3");
_ = configBuilder.AddEnvironmentVariables();
var secretsConfig = configBuilder.Build();

_ = builder.Services
.AddGitHubActionsCore()
.AddSingleton<DiagnosticsChannel>()
.AddServiceDiscovery()
.ConfigureHttpClientDefaults(static client =>
{
_ = client.AddServiceDiscovery();
})
.AddSingleton(diagnosticsCollectorFactory);

builder.Services.TryAddSingleton(sp =>
{
var resolver = sp.GetRequiredService<ServiceEndpointResolver>();
return CreateDocumentationEndpoints(resolver, secretsConfig);
});

_ = builder.Services
.AddSingleton<IConfigurationContext>(sp =>
{
var endpoints = sp.GetRequiredService<DocumentationEndpoints>();
var configurationFileProvider = sp.GetRequiredService<ConfigurationFileProvider>();
var versionsConfiguration = sp.GetRequiredService<VersionsConfiguration>();
var products = sp.GetRequiredService<ProductsConfiguration>();
var legacyUrlMappings = sp.GetRequiredService<LegacyUrlMappingConfiguration>();
var search = sp.GetRequiredService<SearchConfiguration>();
return new ConfigurationContext
{
ConfigurationFileProvider = configurationFileProvider,
VersionsConfiguration = versionsConfiguration,
Endpoints = endpoints,
ProductsConfiguration = products,
LegacyUrlMappings = legacyUrlMappings,
SearchConfiguration = search
};
});

return builder;
}

private static DocumentationEndpoints CreateDocumentationEndpoints(ServiceEndpointResolver resolver, IConfiguration secretsConfig)
{
var elasticsearchUri = ResolveServiceEndpoint(
resolver,
() => TryConfigOrEnvVars(
secretsConfig,
"http://localhost:9200",
"Parameters:DocumentationElasticUrl",
"DOCUMENTATION_ELASTIC_URL",
"CONNECTIONSTRINGS__ELASTICSEARCH"
)
);

var elasticsearchPassword =
elasticsearchUri.UserInfo is { } userInfo && userInfo.Contains(':')
? userInfo.Split(':')[1]
: TryConfigOrEnvVarsOptional(secretsConfig, "Parameters:DocumentationElasticPassword", "DOCUMENTATION_ELASTIC_PASSWORD");

var elasticsearchUser =
elasticsearchUri.UserInfo is { } userInfo2 && userInfo2.Contains(':')
? userInfo2.Split(':')[0]
: TryConfigOrEnvVars(secretsConfig, "elastic", "Parameters:DocumentationElasticUsername", "DOCUMENTATION_ELASTIC_USERNAME");

var elasticsearchApiKey = TryConfigOrEnvVarsOptional(
secretsConfig,
"Parameters:DocumentationElasticApiKey",
"DOCUMENTATION_ELASTIC_APIKEY"
);

return new DocumentationEndpoints
{
Elasticsearch = new ElasticsearchEndpoint
{
Uri = elasticsearchUri,
Password = elasticsearchPassword,
ApiKey = elasticsearchApiKey,
Username = elasticsearchUser
},
};
}

private static string TryConfigOrEnvVars(IConfiguration config, string fallback, params string[] keys)
{
foreach (var key in keys)
{
// Try configuration first (user secrets)
var configValue = config[key];
if (!string.IsNullOrEmpty(configValue))
return configValue;

// Try environment variable (for keys that look like env vars)
if (key.Contains('_') || string.Equals(key, key.ToUpperInvariant(), StringComparison.Ordinal))
{
var envValue = Environment.GetEnvironmentVariable(key);
if (!string.IsNullOrEmpty(envValue))
return envValue;
}
}
return fallback;
}

private static string? TryConfigOrEnvVarsOptional(IConfiguration config, params string[] keys)
{
foreach (var key in keys)
{
// Try configuration first (user secrets)
var configValue = config[key];
if (!string.IsNullOrEmpty(configValue))
return configValue;

// Try environment variable (for keys that look like env vars)
if (key.Contains('_') || string.Equals(key, key.ToUpperInvariant(), StringComparison.Ordinal))
{
var envValue = Environment.GetEnvironmentVariable(key);
if (!string.IsNullOrEmpty(envValue))
return envValue;
}
}
return null;
}

[SuppressMessage("Reliability", "CA2012:Use ValueTasks correctly")]
private static Uri ResolveServiceEndpoint(ServiceEndpointResolver resolver, Func<string> fallback)
{
var get = resolver.GetEndpointsAsync("https+http://elasticsearch", Cancel.None);
var endpoint = get.IsCompletedSuccessfully ? get.Result : get.GetAwaiter().GetResult();
if (endpoint.Endpoints.Count == 0)
return new Uri(fallback());
if (endpoint.Endpoints[0].EndPoint.AddressFamily is AddressFamily.Unknown or AddressFamily.Unspecified)
return new Uri(fallback());
var uri = new Uri(endpoint.Endpoints[0].ToString() ?? throw new InvalidOperationException("No 'elasticsearch' endpoints found"));
return uri;
}
}
27 changes: 26 additions & 1 deletion src/Elastic.Documentation/Search/DocumentationDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public record ParentDocument
public required string Url { get; set; }
}

public record DocumentationDocument
public record DocumentationDocument : IDocument
{
[AiInput]
[JsonPropertyName("title")]
Expand Down Expand Up @@ -150,4 +150,29 @@ public record DocumentationDocument
[JsonPropertyName("ai_use_cases")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string[]? AiUseCases { get; set; }

/// <summary>
/// Hash of the LLM prompt templates used to generate AI fields.
/// Used to detect stale enrichments when prompts change.
/// </summary>
[Keyword]
[JsonPropertyName("enrichment_prompt_hash")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? EnrichmentPromptHash { get; set; }

// HTTP caching fields for incremental sync

/// <summary>
/// ETag header from last crawl - used for conditional HTTP requests.
/// </summary>
[JsonPropertyName("http_etag")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? HttpEtag { get; set; }

/// <summary>
/// Last-Modified header from last crawl - used for conditional HTTP requests.
/// </summary>
[JsonPropertyName("http_last_modified")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public DateTimeOffset? HttpLastModified { get; set; }
}
Loading
Loading