Skip to content

Commit 3d68c09

Browse files
Merge pull request #95 from Financial-Times/fix/UPPSF-6401-transform-body-tree-schema-to-xml
transforms body-tree format to external bodyXML in Go
2 parents 86445db + 757d86a commit 3d68c09

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+4593
-58
lines changed

libraries/to-external-bodyxml/go/transform.go

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,58 @@ import (
1010
contenttree "github.com/Financial-Times/content-tree"
1111
)
1212

13-
// Transform converts content from the content tree format, provided as unmarshalled JSON (json.RawMessage),
14-
// into an "external" XHTML-formatted version of the same content.
13+
type Schema interface {
14+
fmt.Stringer
15+
}
16+
17+
type schema string
18+
19+
func (s schema) String() string { return string(s) }
20+
21+
var (
22+
TransitTree Schema = schema("transit-tree")
23+
BodyTree Schema = schema("body-tree")
24+
)
25+
26+
var (
27+
ErrUnknownKind = errors.New("unknown tree kind")
28+
)
29+
30+
// Transform converts content from a content tree representation into an external XHTML-formatted version.
31+
//
32+
// The tree is provided as unmarshalled JSON (json.RawMessage) and must conform to one of the
33+
// supported Schema kinds: TransitTree or BodyTree.
34+
//
35+
// The Schema interface is used to distinguish which type of content tree should be unmarshalled
36+
// and transformed. Implementations of Schema (TransitTree, BodyTree) serve as markers to select
37+
// the appropriate unmarshal/transform logic.
1538
//
1639
// The XHTML output is intended for distribution to consumers that only support widely recognized formats like HTML
1740
// or those that should not receive internal-specific details contained in the content tree format.
1841
// Such consumers may be external (non-FT) users, automated systems processing HTML-based content,
1942
// republishing platforms, and more.
20-
func Transform(root json.RawMessage) (string, error) {
21-
tree := contenttree.Root{}
43+
func Transform(tree json.RawMessage, s Schema) (string, error) {
44+
switch s {
45+
case TransitTree:
46+
{
47+
n := contenttree.Root{}
48+
return unmarshalAndTransform(tree, &n)
49+
}
50+
case BodyTree:
51+
{
52+
n := contenttree.Body{}
53+
return unmarshalAndTransform(tree, &n)
54+
}
55+
default:
56+
return "", fmt.Errorf("%w: %q (expected %q or %q)", ErrUnknownKind, s, TransitTree, BodyTree)
57+
}
58+
}
2259

23-
err := json.Unmarshal(root, &tree)
24-
if err != nil {
60+
func unmarshalAndTransform(tree json.RawMessage, n contenttree.Node) (string, error) {
61+
if err := json.Unmarshal(tree, n); err != nil {
2562
return "", fmt.Errorf("failed to instantiate content tree: %w", err)
2663
}
27-
28-
return transformNode(&tree)
64+
return transformNode(n)
2965
}
3066

3167
func transformNode(n contenttree.Node) (string, error) {

libraries/to-external-bodyxml/go/transform_test.go

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,35 @@ package toexternalbodyxml
33
import (
44
"encoding/json"
55
"errors"
6+
"fmt"
67
"os"
78
"path/filepath"
89
"strings"
910
"testing"
1011
)
1112

1213
func TestTransform(t *testing.T) {
13-
for _, test := range getTestCases(t) {
14-
t.Run(test.name, func(t *testing.T) {
15-
got, err := Transform(test.input)
16-
17-
if err != nil && !test.wantErr {
18-
t.Errorf("Failed with unexpected error: %v", err)
19-
}
20-
if err != nil && test.wantErr {
21-
return
22-
}
23-
24-
want := strings.TrimSpace(test.output)
25-
got = strings.TrimSpace(got)
26-
27-
if got != want {
28-
t.Errorf("got: %s\n\n want: %s\n", got, want)
29-
}
30-
})
14+
schemas := []Schema{BodyTree, TransitTree}
15+
for _, s := range schemas {
16+
for _, test := range getTestCases(t, s) {
17+
t.Run(test.name, func(t *testing.T) {
18+
got, err := Transform(test.input, s)
19+
20+
if err != nil && !test.wantErr {
21+
t.Errorf("Failed with unexpected error: %v", err)
22+
}
23+
if err != nil && test.wantErr {
24+
return
25+
}
26+
27+
want := strings.TrimSpace(test.output)
28+
got = strings.TrimSpace(got)
29+
30+
if got != want {
31+
t.Errorf("got: %s\n\n want: %s\n", got, want)
32+
}
33+
})
34+
}
3135
}
3236
}
3337

@@ -38,11 +42,11 @@ type TestCase struct {
3842
wantErr bool
3943
}
4044

41-
func getTestCases(t *testing.T) []TestCase {
45+
func getTestCases(t *testing.T, s Schema) []TestCase {
4246
t.Helper()
4347

44-
inputPath := "../../../tests/content-tree-to-external-bodyxml/input"
45-
outputPath := "../../../tests/content-tree-to-external-bodyxml/output"
48+
inputPath := fmt.Sprintf("../../../tests/%s-to-external-bodyxml/input", s)
49+
outputPath := fmt.Sprintf("../../../tests/%s-to-external-bodyxml/output", s)
4650

4751
entries, err := os.ReadDir(inputPath)
4852
if err != nil {
@@ -68,7 +72,7 @@ func getTestCases(t *testing.T) []TestCase {
6872

6973
if _, err := os.Stat(outputFile); errors.Is(err, os.ErrNotExist) {
7074
testCases = append(testCases, TestCase{
71-
name: caseName,
75+
name: fmt.Sprintf("%s-%s", s, caseName),
7276
input: input,
7377
output: "",
7478
wantErr: true,
@@ -80,7 +84,7 @@ func getTestCases(t *testing.T) []TestCase {
8084
}
8185

8286
testCases = append(testCases, TestCase{
83-
name: caseName,
87+
name: fmt.Sprintf("%s-%s", s, caseName),
8488
input: input,
8589
output: string(output),
8690
wantErr: false,

libraries/to-string/go/transform.go

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,44 @@ import (
1010
contenttree "github.com/Financial-Times/content-tree"
1111
)
1212

13+
type Schema interface {
14+
fmt.Stringer
15+
}
16+
17+
type schema string
18+
19+
func (s schema) String() string { return string(s) }
20+
21+
var (
22+
TransitTree Schema = schema("transit-tree")
23+
BodyTree Schema = schema("body-tree")
24+
)
25+
26+
var (
27+
ErrUnknownKind = errors.New("unknown tree kind")
28+
)
29+
1330
var toSeparate = []string{contenttree.HeadingType, contenttree.ParagraphType}
1431

1532
// Transform extracts and returns plain text from a content tree represented as unmarshalled JSON(json.RawMessage).
16-
func Transform(root json.RawMessage) (string, error) {
17-
tree := contenttree.Root{}
33+
func Transform(tree json.RawMessage, s Schema) (string, error) {
34+
switch s {
35+
case TransitTree:
36+
n := contenttree.Root{}
37+
return unmarshalAndTransform(tree, &n)
38+
case BodyTree:
39+
n := contenttree.Body{}
40+
return unmarshalAndTransform(tree, &n)
41+
default:
42+
return "", fmt.Errorf("%w: %q (expected %q or %q)", ErrUnknownKind, s, TransitTree, BodyTree)
43+
}
44+
}
1845

19-
err := json.Unmarshal(root, &tree)
20-
if err != nil {
46+
func unmarshalAndTransform(tree json.RawMessage, n contenttree.Node) (string, error) {
47+
if err := json.Unmarshal(tree, n); err != nil {
2148
return "", fmt.Errorf("failed to instantiate content tree: %w", err)
2249
}
23-
24-
text, err := transformNode(&tree)
50+
text, err := transformNode(n)
2551
if err != nil {
2652
return "", fmt.Errorf("failed to transform tree to string: %w", err)
2753
}
@@ -33,6 +59,7 @@ func Transform(root json.RawMessage) (string, error) {
3359
text = strings.TrimSpace(text)
3460

3561
return text, nil
62+
3663
}
3764

3865
func transformNode(n contenttree.Node) (string, error) {

libraries/to-string/go/transform_test.go

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,33 @@ package tostring
33
import (
44
"encoding/json"
55
"errors"
6+
"fmt"
67
"os"
78
"path/filepath"
89
"strings"
910
"testing"
1011
)
1112

1213
func TestTransform(t *testing.T) {
13-
for _, test := range getTestCases(t) {
14-
t.Run(test.name, func(t *testing.T) {
15-
got, err := Transform(test.input)
16-
17-
if err != nil && !test.wantErr {
18-
t.Errorf("Failed with unexpected error: %v", err)
19-
}
20-
21-
if err != nil && test.wantErr {
22-
return
23-
}
24-
25-
if got != strings.TrimSpace(test.output) {
26-
t.Errorf("got: <%v>\n want: <%v>\n", got, test.output)
27-
}
28-
})
14+
schemas := []Schema{BodyTree, TransitTree}
15+
for _, s := range schemas {
16+
for _, test := range getTestCases(t, s) {
17+
t.Run(test.name, func(t *testing.T) {
18+
got, err := Transform(test.input, s)
19+
20+
if err != nil && !test.wantErr {
21+
t.Errorf("Failed with unexpected error: %v", err)
22+
}
23+
24+
if err != nil && test.wantErr {
25+
return
26+
}
27+
28+
if got != strings.TrimSpace(test.output) {
29+
t.Errorf("got: <%v>\n want: <%v>\n", got, test.output)
30+
}
31+
})
32+
}
2933
}
3034
}
3135

@@ -36,11 +40,11 @@ type TestCase struct {
3640
wantErr bool
3741
}
3842

39-
func getTestCases(t *testing.T) []TestCase {
43+
func getTestCases(t *testing.T, s Schema) []TestCase {
4044
t.Helper()
4145

42-
inputPath := "../../../tests/content-tree-to-string/input"
43-
outputPath := "../../../tests/content-tree-to-string/output"
46+
inputPath := fmt.Sprintf("../../../tests/%s-to-string/input", s)
47+
outputPath := fmt.Sprintf("../../../tests/%s-to-string/output", s)
4448

4549
entries, err := os.ReadDir(inputPath)
4650
if err != nil {
@@ -66,7 +70,7 @@ func getTestCases(t *testing.T) []TestCase {
6670

6771
if _, err := os.Stat(outputFile); errors.Is(err, os.ErrNotExist) {
6872
testCases = append(testCases, TestCase{
69-
name: caseName,
73+
name: fmt.Sprintf("%s-%s", s, caseName),
7074
input: input,
7175
output: "",
7276
wantErr: true,
@@ -78,7 +82,7 @@ func getTestCases(t *testing.T) []TestCase {
7882
}
7983

8084
testCases = append(testCases, TestCase{
81-
name: caseName,
85+
name: fmt.Sprintf("%s-%s", s, caseName),
8286
input: input,
8387
output: string(output),
8488
wantErr: false,

0 commit comments

Comments
 (0)