smithy-lang · TrevorBurnham · Mar 4, 2026
@@ -0,0 +1,5 @@
+---
+"@smithy/service-client-documentation-generator": minor
+---
+
+Strip HTML tags from Smithy documentation traits during TypeScript codegen, producing clean plaintext JSDoc comments instead of raw HTML. This improves hover-doc readability in editors like VS Code and Neovim.
@@ -0,0 +1,183 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package software.amazon.smithy.typescript.codegen;
+
+import java.util.regex.Pattern;
+import software.amazon.smithy.utils.SmithyUnstableApi;
+
+/**
+ * Converts HTML documentation strings from Smithy model {@code @documentation}
+ * traits into plain-text suitable for JSDoc comments.
+ *
+ * <p>The Smithy documentation trait values often contain HTML markup (e.g.
+ * {@code <p>}, {@code <a>}, {@code <code>}, {@code <ul>/<li>}). This class
+ * strips that markup while preserving readable formatting so that IDE hover
+ * docs are clean and legible.
+ */
+@SmithyUnstableApi
+final class DocumentationConverter {
+
+    // Block-level elements that should produce paragraph breaks.
+    private static final Pattern BLOCK_BREAK = Pattern.compile(
+        "<\\s*/?(p|br|h[1-6]|div|section|article|header|footer|nav|aside|main|blockquote|pre|hr|table|thead|tbody|tfoot|tr)\\b[^>]*/?>",
+        Pattern.CASE_INSENSITIVE
+    );
+
+    // List items get a leading dash for readability.
+    private static final Pattern LIST_ITEM_OPEN = Pattern.compile(
+        "<\\s*li\\b[^>]*>",
+        Pattern.CASE_INSENSITIVE
+    );
+
+    // Closing list item tags.
+    private static final Pattern LIST_ITEM_CLOSE = Pattern.compile(
+        "<\\s*/li\\s*>",
+        Pattern.CASE_INSENSITIVE
+    );
+
+    // <ul>, <ol>, <dl> open/close tags — just remove them.
+    private static final Pattern LIST_WRAPPER = Pattern.compile(
+        "<\\s*/?(ul|ol|dl)\\b[^>]*>",
+        Pattern.CASE_INSENSITIVE
+    );
+
+    // <dt> becomes a newline + bold-ish label, <dd> becomes indented.
+    private static final Pattern DT_TAG = Pattern.compile(
+        "<\\s*dt\\b[^>]*>",
+        Pattern.CASE_INSENSITIVE
+    );
+    private static final Pattern DT_CLOSE = Pattern.compile(
+        "<\\s*/dt\\s*>",
+        Pattern.CASE_INSENSITIVE
+    );
+    private static final Pattern DD_TAG = Pattern.compile(
+        "<\\s*dd\\b[^>]*>",
+        Pattern.CASE_INSENSITIVE
+    );
+    private static final Pattern DD_CLOSE = Pattern.compile(
+        "<\\s*/dd\\s*>",
+        Pattern.CASE_INSENSITIVE
+    );
+
+    // <code> and <pre> content is wrapped in backticks.
+    private static final Pattern CODE_OPEN = Pattern.compile(
+        "<\\s*(code|pre)\\b[^>]*>",
+        Pattern.CASE_INSENSITIVE
+    );
+    private static final Pattern CODE_CLOSE = Pattern.compile(
+        "<\\s*/(code|pre)\\s*>",
+        Pattern.CASE_INSENSITIVE
+    );
+
+    // <b>, <strong>, <i>, <em> — just strip them (no markdown equivalent in JSDoc).
+    private static final Pattern INLINE_FORMAT = Pattern.compile(
+        "<\\s*/?(b|strong|i|em|u|s|strike|del|ins|sub|sup|small|big|span|font|mark|abbr|cite|dfn|kbd|samp|var|wbr)\\b[^>]*>",
+        Pattern.CASE_INSENSITIVE
+    );
+
+    // Anchor tags: extract the link text, drop the URL.
+    private static final Pattern ANCHOR = Pattern.compile(
+        "<\\s*a\\b[^>]*>(.*?)<\\s*/a\\s*>",
+        Pattern.CASE_INSENSITIVE | Pattern.DOTALL
+    );
+
+    // <th> and <td> — separate cells with a tab-like space.
+    private static final Pattern TABLE_CELL = Pattern.compile(
+        "<\\s*/?(th|td)\\b[^>]*>",
+        Pattern.CASE_INSENSITIVE
+    );
+
+    // Any remaining HTML tags.
+    private static final Pattern ANY_TAG = Pattern.compile("<[^>]+>");
+
+    // HTML entities.
+    private static final Pattern ENTITY_AMP = Pattern.compile("&amp;", Pattern.CASE_INSENSITIVE);
+    private static final Pattern ENTITY_LT = Pattern.compile("&lt;", Pattern.CASE_INSENSITIVE);
+    private static final Pattern ENTITY_GT = Pattern.compile("&gt;", Pattern.CASE_INSENSITIVE);
+    private static final Pattern ENTITY_QUOT = Pattern.compile("&quot;", Pattern.CASE_INSENSITIVE);
+    private static final Pattern ENTITY_APOS = Pattern.compile("&#39;|&apos;", Pattern.CASE_INSENSITIVE);
+    private static final Pattern ENTITY_NBSP = Pattern.compile("&nbsp;", Pattern.CASE_INSENSITIVE);
+    private static final Pattern ENTITY_NUMERIC = Pattern.compile("&#(\\d+);");
+    private static final Pattern ENTITY_HEX = Pattern.compile("&#x([0-9a-fA-F]+);");
+
+    // Collapse runs of blank lines into at most two newlines (one blank line).
+    private static final Pattern EXCESS_NEWLINES = Pattern.compile("\\n{3,}");
+    // Collapse runs of spaces/tabs on a single line.
+    private static final Pattern EXCESS_SPACES = Pattern.compile("[ \\t]{2,}");
+    // Trailing whitespace on each line.
+    private static final Pattern TRAILING_WS = Pattern.compile("[ \\t]+$", Pattern.MULTILINE);
+
+    private DocumentationConverter() {}
+
+    /**
+     * Converts an HTML documentation string to plain text suitable for JSDoc.
+     *
+     * @param html the raw HTML documentation value from a Smithy model
+     * @return a plain-text version with HTML tags removed and basic formatting preserved
+     */
+    static String htmlToPlainText(String html) {
+        if (html == null || html.isEmpty()) {
+            return html;
+        }
+
+        String s = html;
+
+        // Anchors — keep link text only.
+        s = ANCHOR.matcher(s).replaceAll("$1");
+
+        // <code>/<pre> → backtick-wrapped.
+        s = CODE_OPEN.matcher(s).replaceAll("`");
+        s = CODE_CLOSE.matcher(s).replaceAll("`");
+
+        // List items → newline + dash.
+        s = LIST_ITEM_OPEN.matcher(s).replaceAll("\n -  ");
+        s = LIST_ITEM_CLOSE.matcher(s).replaceAll("");
+
+        // Definition list elements.
+        s = DT_TAG.matcher(s).replaceAll("\n");
+        s = DT_CLOSE.matcher(s).replaceAll(" - ");
+        s = DD_TAG.matcher(s).replaceAll("   ");
+        s = DD_CLOSE.matcher(s).replaceAll("");
+
+        // List wrappers.
+        s = LIST_WRAPPER.matcher(s).replaceAll("\n");
+
+        // Table cells — add spacing.
+        s = TABLE_CELL.matcher(s).replaceAll("  ");
+
+        // Block-level elements → paragraph break.
+        s = BLOCK_BREAK.matcher(s).replaceAll("\n\n");
+
+        // Inline formatting tags — just remove.
+        s = INLINE_FORMAT.matcher(s).replaceAll("");
+
+        // Any remaining tags.
+        s = ANY_TAG.matcher(s).replaceAll("");
+
+        // Decode HTML entities.
+        s = ENTITY_NBSP.matcher(s).replaceAll(" ");
+        s = ENTITY_LT.matcher(s).replaceAll("<");
+        s = ENTITY_GT.matcher(s).replaceAll(">");
+        s = ENTITY_QUOT.matcher(s).replaceAll("\"");
+        s = ENTITY_APOS.matcher(s).replaceAll("'");
+        s = ENTITY_HEX.matcher(s).replaceAll(mr -> {
+            int codePoint = Integer.parseInt(mr.group(1), 16);
+            return String.valueOf((char) codePoint);
+        });
+        s = ENTITY_NUMERIC.matcher(s).replaceAll(mr -> {
+            int codePoint = Integer.parseInt(mr.group(1));
+            return String.valueOf((char) codePoint);
+        });
+        // &amp; must be last to avoid double-decoding.
+        s = ENTITY_AMP.matcher(s).replaceAll("&");
+
+        // Normalize whitespace.
+        s = TRAILING_WS.matcher(s).replaceAll("");
+        s = EXCESS_SPACES.matcher(s).replaceAll(" ");
+        s = EXCESS_NEWLINES.matcher(s).replaceAll("\n\n");
+
+        return s.trim();
+    }
+}
@@ -280,6 +280,7 @@ boolean writeShapeDocs(Shape shape, UnaryOperator<String> preprocessor) {
         if (hasDocumentation || hasDeprecation) {
             String docs =
                 hasDocumentation ? shape.getTrait(DocumentationTrait.class).get().getValue() : "";
+            docs = DocumentationConverter.htmlToPlainText(docs);
             docs = docs.replace("{", "\\{").replace("}", "\\}");
 
             if (hasDeprecation) {
@@ -330,6 +331,7 @@ boolean writeMemberDocs(Model model, MemberShape member) {
         if (hasDocumentation || hasDeprecation) {
             String docs =
                 hasDocumentation ? member.getMemberTrait(model, DocumentationTrait.class).get().getValue() : "";
+            docs = DocumentationConverter.htmlToPlainText(docs);
             docs = docs.replace("{", "\\{").replace("}", "\\}");
 
             if (hasDeprecation) {

@@ -0,0 +1,145 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package software.amazon.smithy.typescript.codegen;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.junit.jupiter.api.Test;
+
+public class DocumentationConverterTest {
+
+    @Test
+    public void returnsNullForNull() {
+        assertNull(DocumentationConverter.htmlToPlainText(null));
+    }
+
+    @Test
+    public void returnsEmptyForEmpty() {
+        assertEquals("", DocumentationConverter.htmlToPlainText(""));
+    }
+
+    @Test
+    public void passesPlainTextThrough() {
+        assertEquals("Hello world.", DocumentationConverter.htmlToPlainText("Hello world."));
+    }
+
+    @Test
+    public void stripsParagraphTags() {
+        String html = "<p>First paragraph.</p><p>Second paragraph.</p>";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("First paragraph.\n\nSecond paragraph.", result);
+    }
+
+    @Test
+    public void stripsAnchorTagsKeepsText() {
+        String html = "See <a href=\"https://example.com\">the docs</a> for details.";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("See the docs for details.", result);
+    }
+
+    @Test
+    public void convertsCodeTagsToBackticks() {
+        String html = "Use the <code>FooClient</code> class.";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("Use the `FooClient` class.", result);
+    }
+
+    @Test
+    public void convertsUnorderedList() {
+        String html = "<p>Options:</p><ul><li>Option A</li><li>Option B</li></ul>";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        // Should have dash-prefixed items.
+        assertTrue(result.startsWith("Options:"), "Should start with 'Options:', got: " + result);
+        assertTrue(result.contains("- Option A"), "Should contain dash-prefixed Option A, got: " + result);
+        assertTrue(result.contains("- Option B"), "Should contain dash-prefixed Option B, got: " + result);
+    }
+
+    @Test
+    public void stripsInlineFormattingTags() {
+        String html = "This is <b>bold</b> and <i>italic</i> and <strong>strong</strong>.";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("This is bold and italic and strong.", result);
+    }
+
+    @Test
+    public void decodesHtmlEntities() {
+        String html = "A &amp; B &lt; C &gt; D &quot;E&quot; F&#39;s";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("A & B < C > D \"E\" F's", result);
+    }
+
+    @Test
+    public void decodesNumericEntities() {
+        String html = "&#169; 2024";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("\u00A9 2024", result);
+    }
+
+    @Test
+    public void decodesHexEntities() {
+        String html = "&#xA9; 2024";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("\u00A9 2024", result);
+    }
+
+    @Test
+    public void handlesBrTags() {
+        String html = "Line one.<br/>Line two.<br>Line three.";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("Line one.\n\nLine two.\n\nLine three.", result);
+    }
+
+    @Test
+    public void collapsesExcessiveWhitespace() {
+        String html = "<p>First.</p>\n\n\n<p>Second.</p>\n\n\n\n<p>Third.</p>";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("First.\n\nSecond.\n\nThird.", result);
+    }
+
+    @Test
+    public void handlesNestedHtml() {
+        String html = "<p>Use <a href=\"https://example.com\"><code>MyApi</code></a> to call the service.</p>";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("Use `MyApi` to call the service.", result);
+    }
+
+    @Test
+    public void handlesDefinitionList() {
+        String html = "<dl><dt>Term</dt><dd>Definition</dd></dl>";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertTrue(result.contains("Term"), "Should contain the term");
+        assertTrue(result.contains("Definition"), "Should contain the definition");
+        assertTrue(result.contains("-"), "Should contain a separator");
+    }
+
+    @Test
+    public void handlesNbsp() {
+        String html = "Hello&nbsp;world";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals("Hello world", result);
+    }
+
+    @Test
+    public void handlesRealWorldEcsExample() {
+        // Simplified version of the ECS RegisterTaskDefinition docs from the issue.
+        String html = "<p>Registers a new task definition from the supplied <code>family</code> and "
+            + "<code>containerDefinitions</code>. Optionally, you can add data volumes to your containers "
+            + "with the <code>volumes</code> parameter. For more information about task definition parameters "
+            + "and defaults, see <a href=\"https://docs.aws.amazon.com/AmazonECS/latest/developerguide/"
+            + "task_defintions.html\">Amazon ECS Task Definitions</a> in the "
+            + "<i>Amazon Elastic Container Service Developer Guide</i>.</p>";
+        String result = DocumentationConverter.htmlToPlainText(html);
+        assertEquals(
+            "Registers a new task definition from the supplied `family` and "
+                + "`containerDefinitions`. Optionally, you can add data volumes to your containers "
+                + "with the `volumes` parameter. For more information about task definition parameters "
+                + "and defaults, see Amazon ECS Task Definitions in the "
+                + "Amazon Elastic Container Service Developer Guide.",
+            result
+        );
+    }
+}
@@ -190,4 +190,22 @@ public void buildDramaticDeprecationAnnotation() {
         String result = TypeScriptWriter.buildDeprecationAnnotation(trait);
         assertEquals("@deprecated Noo!!!", result);
     }
+
+    @Test
+    public void writeShapeDocsStripsHtmlTags() {
+        StringShape shape = StringShape.builder()
+            .id(ShapeId.from("com.example#MyString"))
+            .addTrait(new DocumentationTrait(
+                "<p>Use the <code>FooClient</code> to call <a href=\"https://example.com\">the API</a>.</p>"))
+            .build();
+
+        TypeScriptWriter writer = new TypeScriptWriter("foo");
+        writer.writeShapeDocs(shape);
+        String result = writer.toString();
+
+        assertThat(result, containsString("Use the `FooClient` to call the API."));
+        assertThat(result, not(containsString("<p>")));
+        assertThat(result, not(containsString("<code>")));
+        assertThat(result, not(containsString("<a ")));
+    }
 }