Skip to content

Commit 850a61c

Browse files
frareesyufeih
andauthored
feat!: search improvements (#10113)
* feat!: search improvements * Make it opt-in via _searchIndexUseMetadata --------- Co-authored-by: Yufei Huang <[email protected]>
1 parent b0f5472 commit 850a61c

File tree

8 files changed

+225
-22
lines changed

8 files changed

+225
-22
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Immutable;
5+
using System.Composition;
6+
using Docfx.Build.Common;
7+
using Docfx.DataContracts.ManagedReference;
8+
using Docfx.Plugins;
9+
10+
namespace Docfx.Build.ManagedReference;
11+
12+
[Export(nameof(ManagedReferenceDocumentProcessor), typeof(IDocumentBuildStep))]
13+
public class FillMetadata : BaseDocumentBuildStep
14+
{
15+
public override string Name => nameof(FillMetadata);
16+
public override int BuildOrder => 0x30;
17+
18+
public override void Postbuild(ImmutableList<FileModel> models, IHostService host)
19+
{
20+
if (models.Count > 0)
21+
{
22+
foreach (var model in models)
23+
{
24+
if (model.Type != DocumentType.Article)
25+
{
26+
continue;
27+
}
28+
29+
model.ManifestProperties.Uid = null;
30+
var pageViewModel = (PageViewModel)model.Content;
31+
if (pageViewModel.Items.Count == 0)
32+
{
33+
continue;
34+
}
35+
36+
model.ManifestProperties.IsMRef = true;
37+
model.ManifestProperties.Title = pageViewModel.Items[0].FullName;
38+
model.ManifestProperties.Summary = pageViewModel.Items[0].Summary;
39+
}
40+
}
41+
}
42+
}

src/Docfx.Build/PostProcessors/ExtractSearchIndex.cs

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Licensed to the .NET Foundation under one or more agreements.
1+
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Collections.Immutable;
@@ -18,6 +18,8 @@ partial class ExtractSearchIndex : IPostProcessor
1818
[GeneratedRegex(@"\s+")]
1919
private static partial Regex s_regexWhiteSpace();
2020

21+
private static readonly Regex s_regexCase = new(@"[a-z0-9]+|[A-Z0-9]+[a-z0-9]*|[0-9]+", RegexOptions.Compiled);
22+
2123
private static readonly HashSet<string> s_htmlInlineTags = new(StringComparer.OrdinalIgnoreCase)
2224
{
2325
"a", "area", "del", "ins", "link", "map", "meta", "abbr", "audio", "b", "bdo", "button", "canvas", "cite", "code", "command", "data",
@@ -29,12 +31,20 @@ partial class ExtractSearchIndex : IPostProcessor
2931
public string Name => nameof(ExtractSearchIndex);
3032
public const string IndexFileName = "index.json";
3133

34+
internal bool UseMetadata { get; set; } = false;
35+
internal bool UseMetadataTitle { get; set; } = true;
36+
3237
public ImmutableDictionary<string, object> PrepareMetadata(ImmutableDictionary<string, object> metadata)
3338
{
3439
if (!metadata.ContainsKey("_enableSearch"))
3540
{
3641
metadata = metadata.Add("_enableSearch", true);
3742
}
43+
44+
UseMetadata = metadata.TryGetValue("_searchIndexUseMetadata", out var useMetadataObject) && (bool)useMetadataObject;
45+
UseMetadataTitle = !metadata.TryGetValue("_searchIndexUseMetadataTitle", out var useMetadataTitleObject) || (bool)useMetadataTitleObject;
46+
47+
Logger.LogInfo($"{Name}: {nameof(UseMetadata)} = {UseMetadata}, {nameof(UseMetadataTitle)} = {UseMetadataTitle}");
3848
return metadata;
3949
}
4050

@@ -49,14 +59,15 @@ public Manifest Process(Manifest manifest, string outputFolder, CancellationToke
4959
var htmlFiles = (from item in manifest.Files ?? Enumerable.Empty<ManifestItem>()
5060
from output in item.Output
5161
where item.Type != "Toc" && output.Key.Equals(".html", StringComparison.OrdinalIgnoreCase)
52-
select output.Value.RelativePath).ToList();
62+
select (output.Value.RelativePath, item.Metadata)).ToList();
63+
5364
if (htmlFiles.Count == 0)
5465
{
5566
return manifest;
5667
}
5768

5869
Logger.LogInfo($"Extracting index data from {htmlFiles.Count} html files");
59-
foreach (var relativePath in htmlFiles)
70+
foreach ((string relativePath, Dictionary<string, object> metadata) in htmlFiles)
6071
{
6172
cancellationToken.ThrowIfCancellationRequested();
6273

@@ -76,7 +87,7 @@ from output in item.Output
7687
Logger.LogWarning($"Warning: Can't load content from {filePath}: {ex.Message}");
7788
continue;
7889
}
79-
var indexItem = ExtractItem(html, relativePath);
90+
var indexItem = ExtractItem(html, relativePath, metadata);
8091
if (indexItem != null)
8192
{
8293
indexData[relativePath] = indexItem;
@@ -99,7 +110,7 @@ from output in item.Output
99110
return manifest;
100111
}
101112

102-
internal SearchIndexItem ExtractItem(HtmlDocument html, string href)
113+
internal SearchIndexItem ExtractItem(HtmlDocument html, string href, Dictionary<string, object> metadata = null)
103114
{
104115
var contentBuilder = new StringBuilder();
105116

@@ -117,10 +128,37 @@ internal SearchIndexItem ExtractItem(HtmlDocument html, string href)
117128
ExtractTextFromNode(node, contentBuilder);
118129
}
119130

120-
var content = NormalizeContent(contentBuilder.ToString());
121-
var title = ExtractTitleFromHtml(html);
131+
string title;
132+
string summary = null;
133+
string keywords = null;
122134

123-
return new SearchIndexItem { Href = href, Title = title, Keywords = content };
135+
var isMRef = metadata != null && metadata.TryGetValue("IsMRef", out var isMRefMetadata) && (bool)isMRefMetadata;
136+
if (UseMetadata && isMRef)
137+
{
138+
title = UseMetadataTitle
139+
? (string)metadata["Title"] ?? ExtractTitleFromHtml(html)
140+
: ExtractTitleFromHtml(html);
141+
142+
var htmlSummary = (string)metadata["Summary"];
143+
if (!string.IsNullOrEmpty(htmlSummary))
144+
{
145+
var htmlDocument = new HtmlDocument();
146+
htmlDocument.LoadHtml(htmlSummary);
147+
var htmlRootNode = htmlDocument.DocumentNode.FirstChild;
148+
var summaryBuilder = new StringBuilder();
149+
ExtractTextFromNode(htmlRootNode, summaryBuilder);
150+
summary = NormalizeContent(summaryBuilder.ToString());
151+
}
152+
153+
keywords = string.Join(' ', title.Split(' ').Select(word => string.Join(' ', GetStemAggregations(word.Split('.')[^1]))));
154+
}
155+
else
156+
{
157+
title = ExtractTitleFromHtml(html);
158+
summary = NormalizeContent(contentBuilder.ToString());
159+
}
160+
161+
return new SearchIndexItem { Href = href, Title = title, Summary = summary, Keywords = keywords };
124162
}
125163

126164
private static string ExtractTitleFromHtml(HtmlDocument html)
@@ -140,6 +178,41 @@ private static string NormalizeContent(string str)
140178
return s_regexWhiteSpace().Replace(str, " ").Trim();
141179
}
142180

181+
private static string[] GetStems(string str)
182+
{
183+
if (string.IsNullOrEmpty(str))
184+
{
185+
return [string.Empty];
186+
}
187+
str = WebUtility.HtmlDecode(str);
188+
return s_regexCase.Matches(str).Select(m => m.Value).ToArray();
189+
}
190+
191+
private static List<string> GetStemAggregations(string str)
192+
{
193+
var stems = GetStems(str);
194+
195+
var results = new List<string>();
196+
Aggregate(stems, [], results, 0);
197+
return results;
198+
199+
static void Aggregate(string[] input, List<string> current, List<string> results, int index)
200+
{
201+
if (index == input.Length)
202+
{
203+
return;
204+
}
205+
206+
for (int i = index; i < input.Length; i++)
207+
{
208+
current.Add(input[i]);
209+
results.Add(string.Join(string.Empty, current));
210+
Aggregate(input, current, results, i + 1);
211+
current.RemoveAt(current.Count - 1);
212+
}
213+
}
214+
}
215+
143216
private static void ExtractTextFromNode(HtmlNode node, StringBuilder contentBuilder)
144217
{
145218
if (node == null)

src/Docfx.Build/PostProcessors/SearchIndexItem.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ class SearchIndexItem
2020
[JsonPropertyName("keywords")]
2121
public string Keywords { get; set; }
2222

23+
[JsonProperty("summary")]
24+
[JsonPropertyName("summary")]
25+
public string Summary { get; set; }
26+
2327
public override bool Equals(object obj)
2428
{
2529
return Equals(obj as SearchIndexItem);
@@ -35,11 +39,17 @@ public bool Equals(SearchIndexItem other)
3539
{
3640
return true;
3741
}
38-
return string.Equals(Title, other.Title) && string.Equals(Href, other.Href) && string.Equals(Keywords, other.Keywords);
42+
return string.Equals(Title, other.Title) &&
43+
string.Equals(Href, other.Href) &&
44+
string.Equals(Summary, other.Summary) &&
45+
string.Equals(Keywords, other.Keywords);
3946
}
4047

4148
public override int GetHashCode()
4249
{
43-
return Title.GetHashCode() ^ Href.GetHashCode() ^ Keywords.GetHashCode();
50+
return Title.GetHashCode() ^
51+
Href.GetHashCode() ^
52+
Summary.GetHashCode() ^
53+
Keywords.GetHashCode();
4454
}
4555
}

templates/default/src/search-worker.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
var results = [];
4141
hits.forEach(function (hit) {
4242
var item = searchData[hit.ref];
43-
results.push({ 'href': item.href, 'title': item.title, 'keywords': item.keywords });
43+
results.push({ 'href': item.href, 'title': item.title, 'summary': item.summary, 'keywords': item.keywords });
4444
});
4545
postMessage({ e: 'query-ready', q: q, d: results });
4646
}
@@ -51,7 +51,8 @@
5151
this.pipeline.remove(lunr.stopWordFilter);
5252
this.ref('href');
5353
this.field('title', { boost: 50 });
54-
this.field('keywords', { boost: 20 });
54+
this.field('keywords', { boost: 40 });
55+
this.field('summary', { boost: 20 });
5556

5657
for (var prop in searchData) {
5758
if (searchData.hasOwnProperty(prop)) {

templates/default/styles/docfx.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,9 @@ $(function () {
250250
}
251251

252252
function extractContentBrief(content) {
253+
if (!content) {
254+
return
255+
}
253256
var briefOffset = 512;
254257
var words = query.split(/\s+/g);
255258
var queryIndex = content.indexOf(words[0]);
@@ -285,7 +288,7 @@ $(function () {
285288
var itemRawHref = relativeUrlToAbsoluteUrl(currentUrl, relHref + hit.href);
286289
var itemHref = relHref + hit.href + "?q=" + query;
287290
var itemTitle = hit.title;
288-
var itemBrief = extractContentBrief(hit.keywords);
291+
var itemBrief = extractContentBrief(hit.summary || '');
289292

290293
var itemNode = $('<div>').attr('class', 'sr-item');
291294
var itemTitleNode = $('<div>').attr('class', 'item-title').append($('<a>').attr('href', itemHref).attr("target", "_blank").attr("rel", "noopener noreferrer").text(itemTitle));

templates/modern/src/search-worker.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { get, set, createStore } from 'idb-keyval'
1010
type SearchHit = {
1111
href: string
1212
title: string
13+
summary: string
1314
keywords: string
1415
}
1516

@@ -47,7 +48,8 @@ async function loadIndex({ lunrLanguages }: { lunrLanguages?: string[] }) {
4748

4849
this.ref('href')
4950
this.field('title', { boost: 50 })
50-
this.field('keywords', { boost: 20 })
51+
this.field('keywords', { boost: 40 })
52+
this.field('summary', { boost: 20 })
5153

5254
if (lunrLanguages && lunrLanguages.length > 0) {
5355
this.use(lunr.multiLanguage(...lunrLanguages))

templates/modern/src/search.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { classMap } from 'lit-html/directives/class-map.js'
88
type SearchHit = {
99
href: string
1010
title: string
11+
summary: string
1112
keywords: string
1213
}
1314

@@ -34,6 +35,11 @@ export async function enableSearch() {
3435
case 'index-ready':
3536
searchQuery.disabled = false
3637
searchQuery.addEventListener('input', onSearchQueryInput)
38+
searchQuery.addEventListener('keypress', function(e) {
39+
if (e.key === 'Enter') {
40+
event.preventDefault()
41+
}
42+
})
3743
window.docfx.searchReady = true
3844
break
3945
case 'query-ready':
@@ -56,7 +62,8 @@ export async function enableSearch() {
5662
if (query === '') {
5763
document.body.removeAttribute('data-search')
5864
} else {
59-
worker.postMessage({ q: query })
65+
const additiveQuery = query.replace(/\s+/g, ' ').split(' ').map(w => '+' + w).join(' ')
66+
worker.postMessage({ q: additiveQuery })
6067
}
6168
}
6269

@@ -108,7 +115,7 @@ export async function enableSearch() {
108115
const currentUrl = window.location.href
109116
const itemRawHref = relativeUrlToAbsoluteUrl(currentUrl, relHref + hit.href)
110117
const itemHref = relHref + hit.href + '?q=' + query
111-
const itemBrief = extractContentBrief(hit.keywords)
118+
const itemBrief = hit.summary ? extractContentBrief(hit.summary) : ''
112119
113120
return html`
114121
<div class="sr-item">

0 commit comments

Comments
 (0)