Skip to content

Commit 38e7308

Browse files
committed
feat: Add version-based data parsing
This commit introduces the capability to parse data files from GitHub releases based on their semantic version tag. This is crucial for handling evolving data schemas from upstream sources like `web-features`. Key changes: - **`lib/gh`**: `DownloadFileFromRelease` now returns a `*gh.File` struct containing both the file contents and the release's semver tag. - **`web_feature_consumer`**: A new `parseByVersion` method uses the release tag to select the appropriate parser. It currently supports routing to V2 and V3 parsers for `web-features` data, ensuring backward compatibility and readiness for the new schema. - **`bcd_consumer`**: Updated to use the new `*gh.File` struct, though it does not yet perform version-based parsing. - **Testing**: New tests have been added to validate the version-routing logic in the `web_feature_consumer`.
1 parent b0a4dfa commit 38e7308

File tree

9 files changed

+298
-45
lines changed

9 files changed

+298
-45
lines changed

lib/gh/download_file_from_release.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"net/http"
2222

2323
"github.com/google/go-github/v73/github"
24+
"golang.org/x/mod/semver"
2425
)
2526

2627
var (
@@ -31,11 +32,21 @@ var (
3132
ErrFatalError = errors.New("fatal error using github")
3233
)
3334

35+
type File struct {
36+
Contents io.ReadCloser
37+
Info ReleaseInfo
38+
}
39+
40+
type ReleaseInfo struct {
41+
// If the tag is valid, the will be non null.
42+
Tag *string
43+
}
44+
3445
func (c *Client) DownloadFileFromRelease(
3546
ctx context.Context,
3647
owner, repo string,
3748
httpClient *http.Client,
38-
filePattern string) (io.ReadCloser, error) {
49+
filePattern string) (*File, error) {
3950
release, _, err := c.repoClient.GetLatestRelease(ctx, owner, repo)
4051
if err != nil {
4152
// nolint: exhaustruct // WONTFIX. This is an external package. Cannot control it.
@@ -84,5 +95,16 @@ func (c *Client) DownloadFileFromRelease(
8495
return nil, errors.Join(ErrUnableToDownloadAsset, err)
8596
}
8697

87-
return resp.Body, nil
98+
tagName := release.GetTagName()
99+
var tagNamePtr *string
100+
if semver.IsValid(tagName) {
101+
tagNamePtr = &tagName
102+
}
103+
104+
return &File{
105+
Contents: resp.Body,
106+
Info: ReleaseInfo{
107+
Tag: tagNamePtr,
108+
},
109+
}, nil
88110
}

lib/gh/download_file_from_release_test.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ func checkIfFileIsReadable(t *testing.T, file io.Reader) {
3434
}
3535
}
3636

37+
func checkIfTagIsPopulated(t *testing.T, file *File) {
38+
if file.Info.Tag == nil || *file.Info.Tag == "" {
39+
t.Error("tag is empty")
40+
}
41+
}
42+
3743
func TestDownloadFileFromReleaseWebFeatures(t *testing.T) {
3844
t.Skip("Used for debugging purposes.")
3945
client := NewClient("")
@@ -44,14 +50,14 @@ func TestDownloadFileFromReleaseWebFeatures(t *testing.T) {
4450
t.Errorf("unexpected error: %s", err.Error())
4551
} else {
4652
// Close to be safe at the end.
47-
defer file.Close()
48-
checkIfFileIsReadable(t, file)
53+
defer file.Contents.Close()
54+
checkIfFileIsReadable(t, file.Contents)
55+
checkIfTagIsPopulated(t, file)
4956
}
5057
}
5158

5259
func TestDownloadFileFromReleaseBrowserCompatData(t *testing.T) {
5360
t.Skip("Used for debugging purposes.")
54-
t.Skip("Cannot remove until https://github.com/mdn/browser-compat-data/issues/22675 is fixed")
5561
client := NewClient("")
5662
ctx := context.Background()
5763
httpClient := http.DefaultClient
@@ -60,8 +66,9 @@ func TestDownloadFileFromReleaseBrowserCompatData(t *testing.T) {
6066
t.Errorf("unexpected error: %s", err.Error())
6167
} else {
6268
// Close to be safe at the end.
63-
defer file.Close()
64-
checkIfFileIsReadable(t, file)
69+
defer file.Contents.Close()
70+
checkIfFileIsReadable(t, file.Contents)
71+
checkIfTagIsPopulated(t, file)
6572
}
6673
}
6774

lib/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ require (
6868
go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.13.0 // indirect
6969
go.opentelemetry.io/otel/log v0.13.0 // indirect
7070
go.opentelemetry.io/otel/sdk/log v0.13.0 // indirect
71+
golang.org/x/mod v0.27.0 // indirect
7172
google.golang.org/appengine/v2 v2.0.6 // indirect
7273
)
7374

lib/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91
12531253
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
12541254
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
12551255
golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
1256+
golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
1257+
golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
12561258
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
12571259
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
12581260
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=

workflows/steps/services/bcd_consumer/pkg/workflow/bcd_releases_worker.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"net/http"
2121

2222
"github.com/GoogleChrome/webstatus.dev/lib/gcpspanner/spanneradapters/bcdconsumertypes"
23+
"github.com/GoogleChrome/webstatus.dev/lib/gh"
2324
"github.com/GoogleChrome/webstatus.dev/workflows/steps/services/bcd_consumer/pkg/data"
2425
)
2526

@@ -45,7 +46,7 @@ type DataGetter interface {
4546
ctx context.Context,
4647
owner, repo string,
4748
httpClient *http.Client,
48-
filePattern string) (io.ReadCloser, error)
49+
filePattern string) (*gh.File, error)
4950
}
5051

5152
// DataParser describes the behavior to read raw bytes into the expected BCDData struct.
@@ -106,7 +107,7 @@ func (p BCDJobProcessor) Process(
106107
}
107108

108109
// Step 2. Parse the file.
109-
data, err := p.dataParser.Parse(file)
110+
data, err := p.dataParser.Parse(file.Contents)
110111
if err != nil {
111112
return err
112113
}

workflows/steps/services/bcd_consumer/pkg/workflow/bcd_releases_worker_test.go

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/GoogleChrome/webstatus.dev/lib/gcpspanner/spanneradapters/bcdconsumertypes"
2929
"github.com/GoogleChrome/webstatus.dev/lib/gen/jsonschema/mdn__browser_compat_data"
30+
"github.com/GoogleChrome/webstatus.dev/lib/gh"
3031
"github.com/GoogleChrome/webstatus.dev/workflows/steps/services/bcd_consumer/pkg/data"
3132
)
3233

@@ -56,7 +57,7 @@ type mockDownloadFileFromReleaseConfig struct {
5657
repoOwner string
5758
repoName string
5859
filePattern string
59-
fakeFile io.ReadCloser
60+
fakeFile *gh.File
6061
err error
6162
}
6263

@@ -69,7 +70,7 @@ func (m *MockDataGetter) DownloadFileFromRelease(
6970
_ context.Context,
7071
owner, repo string,
7172
_ *http.Client,
72-
filePattern string) (io.ReadCloser, error) {
73+
filePattern string) (*gh.File, error) {
7374
if m.mockDownloadFileFromReleaseCfg.repoOwner != owner ||
7475
m.mockDownloadFileFromReleaseCfg.repoName != repo ||
7576
m.mockDownloadFileFromReleaseCfg.filePattern != filePattern {
@@ -97,7 +98,8 @@ func (m *MockDataParser) Parse(in io.ReadCloser) (*data.BCDData, error) {
9798
m.t.Errorf("unable to read file")
9899
}
99100
if m.mockParseCfg.expectedFileContents != string(fileContents) {
100-
m.t.Error("unexpected file contents")
101+
m.t.Errorf("unexpected file contents. want: %s, got: %s",
102+
m.mockParseCfg.expectedFileContents, string(fileContents))
101103
}
102104

103105
return m.mockParseCfg.ret, m.mockParseCfg.err
@@ -205,6 +207,16 @@ func getSampleReleases() []bcdconsumertypes.BrowserRelease {
205207
}
206208

207209
func TestProcess(t *testing.T) {
210+
// Create a function to generate a file because Contents can only be read once
211+
testFileFn := func() *gh.File {
212+
return &gh.File{
213+
Contents: io.NopCloser(strings.NewReader("success")),
214+
Info: gh.ReleaseInfo{
215+
Tag: nil,
216+
},
217+
}
218+
}
219+
208220
testCases := []processWorkflowTest{
209221
{
210222
name: "successful process",
@@ -215,7 +227,7 @@ func TestProcess(t *testing.T) {
215227
repoOwner: repoOwner,
216228
repoName: repoName,
217229
filePattern: filePattern,
218-
fakeFile: io.NopCloser(strings.NewReader("success")),
230+
fakeFile: testFileFn(),
219231
err: nil,
220232
},
221233
mockParseCfg: &mockParseConfig{
@@ -244,7 +256,7 @@ func TestProcess(t *testing.T) {
244256
repoOwner: repoOwner,
245257
repoName: repoName,
246258
filePattern: filePattern,
247-
fakeFile: io.NopCloser(strings.NewReader("success")),
259+
fakeFile: testFileFn(),
248260
err: errTestGetter,
249261
},
250262
mockParseCfg: nil,
@@ -261,7 +273,7 @@ func TestProcess(t *testing.T) {
261273
repoOwner: repoOwner,
262274
repoName: repoName,
263275
filePattern: filePattern,
264-
fakeFile: io.NopCloser(strings.NewReader("success")),
276+
fakeFile: testFileFn(),
265277
err: nil,
266278
},
267279
mockParseCfg: &mockParseConfig{
@@ -282,7 +294,7 @@ func TestProcess(t *testing.T) {
282294
repoOwner: repoOwner,
283295
repoName: repoName,
284296
filePattern: filePattern,
285-
fakeFile: io.NopCloser(strings.NewReader("success")),
297+
fakeFile: testFileFn(),
286298
err: nil,
287299
},
288300
mockParseCfg: &mockParseConfig{
@@ -308,7 +320,7 @@ func TestProcess(t *testing.T) {
308320
repoOwner: repoOwner,
309321
repoName: repoName,
310322
filePattern: filePattern,
311-
fakeFile: io.NopCloser(strings.NewReader("success")),
323+
fakeFile: testFileFn(),
312324
err: nil,
313325
},
314326
mockParseCfg: &mockParseConfig{

workflows/steps/services/web_feature_consumer/cmd/job/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func main() {
9292
spanneradapters.NewWebFeatureGroupsConsumer(spannerClient),
9393
spanneradapters.NewWebFeatureSnapshotsConsumer(spannerClient),
9494
data.Parser{},
95+
data.V3Parser{},
9596
)
9697

9798
// Job Generation

workflows/steps/services/web_feature_consumer/pkg/workflow/web_features_worker.go

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,16 @@ package workflow
1616

1717
import (
1818
"context"
19+
"errors"
1920
"io"
2021
"log/slog"
2122
"net/http"
2223
"time"
2324

2425
"github.com/GoogleChrome/webstatus.dev/lib/gen/jsonschema/web_platform_dx__web_features"
26+
"github.com/GoogleChrome/webstatus.dev/lib/gh"
2527
"github.com/GoogleChrome/webstatus.dev/lib/webdxfeaturetypes"
28+
"golang.org/x/mod/semver"
2629
)
2730

2831
// AssetGetter describes the behavior to get a certain asset from a github release.
@@ -31,14 +34,19 @@ type AssetGetter interface {
3134
ctx context.Context,
3235
owner, repo string,
3336
httpClient *http.Client,
34-
filePattern string) (io.ReadCloser, error)
37+
filePattern string) (*gh.File, error)
3538
}
3639

3740
// AssetParser describes the behavior to parse the io.ReadCloser from AssetGetter into the expected data type.
3841
type AssetParser interface {
3942
Parse(io.ReadCloser) (*webdxfeaturetypes.ProcessedWebFeaturesData, error)
4043
}
4144

45+
var (
46+
ErrUnknownAssetVersion = errors.New("unknown asset version")
47+
ErrUnsupportedAssetVersion = errors.New("unsupported asset version")
48+
)
49+
4250
// WebFeatureStorer describes the logic to insert the web features that were returned by the AssetParser.
4351
type WebFeatureStorer interface {
4452
InsertWebFeatures(
@@ -86,25 +94,73 @@ func NewWebFeaturesJobProcessor(assetGetter AssetGetter,
8694
metadataStorer WebFeatureMetadataStorer,
8795
groupStorer WebDXGroupStorer,
8896
snapshotStorer WebDXSnapshotStorer,
89-
webFeaturesDataParser AssetParser,
97+
webFeaturesDataV2Parser AssetParser,
98+
webFeaturesDataV3Parser AssetParser,
9099
) WebFeaturesJobProcessor {
91100
return WebFeaturesJobProcessor{
92-
assetGetter: assetGetter,
93-
storer: storer,
94-
metadataStorer: metadataStorer,
95-
groupStorer: groupStorer,
96-
snapshotStorer: snapshotStorer,
97-
webFeaturesDataParser: webFeaturesDataParser,
101+
assetGetter: assetGetter,
102+
storer: storer,
103+
metadataStorer: metadataStorer,
104+
groupStorer: groupStorer,
105+
snapshotStorer: snapshotStorer,
106+
webFeaturesDataV2Parser: webFeaturesDataV2Parser,
107+
webFeaturesDataV3Parser: webFeaturesDataV3Parser,
98108
}
99109
}
100110

101111
type WebFeaturesJobProcessor struct {
102-
assetGetter AssetGetter
103-
storer WebFeatureStorer
104-
metadataStorer WebFeatureMetadataStorer
105-
groupStorer WebDXGroupStorer
106-
snapshotStorer WebDXSnapshotStorer
107-
webFeaturesDataParser AssetParser
112+
assetGetter AssetGetter
113+
storer WebFeatureStorer
114+
metadataStorer WebFeatureMetadataStorer
115+
groupStorer WebDXGroupStorer
116+
snapshotStorer WebDXSnapshotStorer
117+
webFeaturesDataV2Parser AssetParser
118+
webFeaturesDataV3Parser AssetParser
119+
}
120+
121+
const (
122+
// According to https://pkg.go.dev/golang.org/x/mod/semver, the version must start with "v".
123+
v2 = "v2.0.0"
124+
v3 = "v3.0.0"
125+
v4 = "v4.0.0"
126+
)
127+
128+
func (p WebFeaturesJobProcessor) parseByVersion(ctx context.Context, file *gh.File) (
129+
*webdxfeaturetypes.ProcessedWebFeaturesData, error) {
130+
if file.Info.Tag == nil {
131+
slog.ErrorContext(ctx, "unknown version", "version", "nil")
132+
133+
return nil, ErrUnknownAssetVersion
134+
}
135+
136+
if semver.Compare(*file.Info.Tag, v3) == -1 {
137+
// If less than version 3, use default v2 parser
138+
slog.InfoContext(ctx, "using v2 parser", "version", *file.Info.Tag)
139+
data, err := p.webFeaturesDataV2Parser.Parse(file.Contents)
140+
if err != nil {
141+
slog.ErrorContext(ctx, "unable to parse v2 data", "error", err)
142+
143+
return nil, err
144+
}
145+
146+
return data, nil
147+
148+
} else if semver.Compare(*file.Info.Tag, v4) == -1 {
149+
// If version 3, use v3 parser
150+
slog.InfoContext(ctx, "using v3 parser", "version", *file.Info.Tag)
151+
data, err := p.webFeaturesDataV3Parser.Parse(file.Contents)
152+
if err != nil {
153+
slog.ErrorContext(ctx, "unable to parse v3 data", "error", err)
154+
155+
return nil, err
156+
}
157+
158+
return data, nil
159+
}
160+
161+
slog.ErrorContext(ctx, "unsupported version", "version", *file.Info.Tag)
162+
163+
return nil, ErrUnsupportedAssetVersion
108164
}
109165

110166
func (p WebFeaturesJobProcessor) Process(ctx context.Context, job JobArguments) error {
@@ -120,7 +176,7 @@ func (p WebFeaturesJobProcessor) Process(ctx context.Context, job JobArguments)
120176
return err
121177
}
122178

123-
data, err := p.webFeaturesDataParser.Parse(file)
179+
data, err := p.parseByVersion(ctx, file)
124180
if err != nil {
125181
slog.ErrorContext(ctx, "unable to parse data", "error", err)
126182

0 commit comments

Comments
 (0)