Skip to content

Commit 4cbb39d

Browse files
authored
feat: Add version-based data parsing (#1746)
* feat: Add version-based data parsing This commit introduces the capability to parse data files from GitHub releases based on their semantic version tag. This is crucial for handling evolving data schemas from upstream sources like `web-features`. Key changes: - **`lib/gh`**: `DownloadFileFromRelease` now returns a `*gh.File` struct containing both the file contents and the release's semver tag. - **`web_feature_consumer`**: A new `parseByVersion` method uses the release tag to select the appropriate parser. It currently supports routing to V2 and V3 parsers for `web-features` data, ensuring backward compatibility and readiness for the new schema. - **`bcd_consumer`**: Updated to use the new `*gh.File` struct, though it does not yet perform version-based parsing. - **Testing**: New tests have been added to validate the version-routing logic in the `web_feature_consumer`. * address feedback
1 parent ece0338 commit 4cbb39d

File tree

9 files changed

+366
-46
lines changed

9 files changed

+366
-46
lines changed

lib/gh/download_file_from_release.go

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@ import (
1818
"context"
1919
"errors"
2020
"io"
21+
"log/slog"
2122
"net/http"
23+
"strings"
2224

2325
"github.com/google/go-github/v73/github"
26+
"golang.org/x/mod/semver"
2427
)
2528

2629
var (
@@ -31,11 +34,22 @@ var (
3134
ErrFatalError = errors.New("fatal error using github")
3235
)
3336

37+
// ReleaseFile represents a file in a given Github release.
38+
type ReleaseFile struct {
39+
Contents io.ReadCloser
40+
Info ReleaseInfo
41+
}
42+
43+
type ReleaseInfo struct {
44+
// If the tag is valid, the will be non null.
45+
Tag *string
46+
}
47+
3448
func (c *Client) DownloadFileFromRelease(
3549
ctx context.Context,
3650
owner, repo string,
3751
httpClient *http.Client,
38-
filePattern string) (io.ReadCloser, error) {
52+
filePattern string) (*ReleaseFile, error) {
3953
release, _, err := c.repoClient.GetLatestRelease(ctx, owner, repo)
4054
if err != nil {
4155
// nolint: exhaustruct // WONTFIX. This is an external package. Cannot control it.
@@ -84,5 +98,24 @@ func (c *Client) DownloadFileFromRelease(
8498
return nil, errors.Join(ErrUnableToDownloadAsset, err)
8599
}
86100

87-
return resp.Body, nil
101+
// Returns a tag or empty string if not found.
102+
tagName := release.GetTagName()
103+
// In the event the tag is missing the prefix "v", add it.
104+
// According https://pkg.go.dev/golang.org/x/mod/semver, the version must start with v
105+
if len(tagName) > 0 && !strings.HasPrefix(tagName, "v") {
106+
tagName = "v" + tagName
107+
}
108+
var tagNamePtr *string
109+
if semver.IsValid(tagName) {
110+
tagNamePtr = &tagName
111+
} else {
112+
slog.WarnContext(ctx, "invalid tag. it will not be used", "tag", tagName)
113+
}
114+
115+
return &ReleaseFile{
116+
Contents: resp.Body,
117+
Info: ReleaseInfo{
118+
Tag: tagNamePtr,
119+
},
120+
}, nil
88121
}

lib/gh/download_file_from_release_test.go

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"net/http"
2222
"testing"
2323

24+
"github.com/google/go-cmp/cmp"
2425
"github.com/google/go-github/v73/github"
2526
)
2627

@@ -34,6 +35,12 @@ func checkIfFileIsReadable(t *testing.T, file io.Reader) {
3435
}
3536
}
3637

38+
func checkIfTagIsPopulated(t *testing.T, file *ReleaseFile) {
39+
if file.Info.Tag == nil || *file.Info.Tag == "" {
40+
t.Error("tag is empty")
41+
}
42+
}
43+
3744
func TestDownloadFileFromReleaseWebFeatures(t *testing.T) {
3845
t.Skip("Used for debugging purposes.")
3946
client := NewClient("")
@@ -44,14 +51,14 @@ func TestDownloadFileFromReleaseWebFeatures(t *testing.T) {
4451
t.Errorf("unexpected error: %s", err.Error())
4552
} else {
4653
// Close to be safe at the end.
47-
defer file.Close()
48-
checkIfFileIsReadable(t, file)
54+
defer file.Contents.Close()
55+
checkIfFileIsReadable(t, file.Contents)
56+
checkIfTagIsPopulated(t, file)
4957
}
5058
}
5159

5260
func TestDownloadFileFromReleaseBrowserCompatData(t *testing.T) {
5361
t.Skip("Used for debugging purposes.")
54-
t.Skip("Cannot remove until https://github.com/mdn/browser-compat-data/issues/22675 is fixed")
5562
client := NewClient("")
5663
ctx := context.Background()
5764
httpClient := http.DefaultClient
@@ -60,8 +67,9 @@ func TestDownloadFileFromReleaseBrowserCompatData(t *testing.T) {
6067
t.Errorf("unexpected error: %s", err.Error())
6168
} else {
6269
// Close to be safe at the end.
63-
defer file.Close()
64-
checkIfFileIsReadable(t, file)
70+
defer file.Contents.Close()
71+
checkIfFileIsReadable(t, file.Contents)
72+
checkIfTagIsPopulated(t, file)
6573
}
6674
}
6775

@@ -113,6 +121,7 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
113121
cfg mockGetLatestReleaseConfig
114122
roundTripCfg *mockRoundTripperConfig
115123
expectedError error
124+
expectedFile *ReleaseFile
116125
}{
117126
{
118127
name: "successful download",
@@ -127,6 +136,7 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
127136
BrowserDownloadURL: valuePtr("http://example.com/file.txt"),
128137
},
129138
},
139+
TagName: valuePtr("v1.0.0"),
130140
},
131141
err: nil,
132142
},
@@ -135,10 +145,51 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
135145
//nolint: exhaustruct
136146
resp: &http.Response{
137147
StatusCode: http.StatusOK,
148+
Body: io.NopCloser(nil),
138149
},
139150
err: nil,
140151
},
141152
expectedError: nil,
153+
expectedFile: &ReleaseFile{
154+
Contents: io.NopCloser(nil),
155+
Info: ReleaseInfo{
156+
Tag: valuePtr("v1.0.0"),
157+
},
158+
},
159+
},
160+
{
161+
name: "successful download without leading v in tag",
162+
cfg: mockGetLatestReleaseConfig{
163+
expectedOwner: "owner",
164+
expectedRepo: "repo",
165+
//nolint: exhaustruct
166+
release: &github.RepositoryRelease{
167+
Assets: []*github.ReleaseAsset{
168+
{
169+
Name: valuePtr("file.txt"),
170+
BrowserDownloadURL: valuePtr("http://example.com/file.txt"),
171+
},
172+
},
173+
TagName: valuePtr("2.0.0"),
174+
},
175+
err: nil,
176+
},
177+
roundTripCfg: &mockRoundTripperConfig{
178+
expectedURL: "http://example.com/file.txt",
179+
//nolint: exhaustruct
180+
resp: &http.Response{
181+
StatusCode: http.StatusOK,
182+
Body: io.NopCloser(nil),
183+
},
184+
err: nil,
185+
},
186+
expectedError: nil,
187+
expectedFile: &ReleaseFile{
188+
Contents: io.NopCloser(nil),
189+
Info: ReleaseInfo{
190+
Tag: valuePtr("v2.0.0"),
191+
},
192+
},
142193
},
143194
{
144195
name: "rate limit with github api",
@@ -151,6 +202,7 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
151202
},
152203
roundTripCfg: nil,
153204
expectedError: ErrRateLimit,
205+
expectedFile: nil,
154206
},
155207
{
156208
name: "unknown error with github api",
@@ -162,6 +214,7 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
162214
},
163215
roundTripCfg: nil,
164216
expectedError: ErrFatalError,
217+
expectedFile: nil,
165218
},
166219
{
167220
name: "missing asset",
@@ -174,6 +227,7 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
174227
},
175228
roundTripCfg: nil,
176229
expectedError: ErrAssetNotFound,
230+
expectedFile: nil,
177231
},
178232
{
179233
name: "request.DO() fails",
@@ -198,6 +252,7 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
198252
err: errors.New("something went wrong"),
199253
},
200254
expectedError: ErrUnableToDownloadAsset,
255+
expectedFile: nil,
201256
},
202257
{
203258
name: "failed to download",
@@ -224,6 +279,7 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
224279
err: nil,
225280
},
226281
expectedError: ErrUnableToDownloadAsset,
282+
expectedFile: nil,
227283
},
228284
}
229285
for _, tc := range testCases {
@@ -241,7 +297,7 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
241297

242298
httpClient := http.DefaultClient
243299
httpClient.Transport = &rt
244-
_, err := client.DownloadFileFromRelease(
300+
file, err := client.DownloadFileFromRelease(
245301
context.Background(),
246302
"owner",
247303
"repo",
@@ -251,6 +307,9 @@ func TestMockDownloadFileFromRelease(t *testing.T) {
251307
if !errors.Is(err, tc.expectedError) {
252308
t.Errorf("unexpected error expected: %v received: %v", tc.expectedError, err)
253309
}
310+
if diff := cmp.Diff(tc.expectedFile, file); diff != "" {
311+
t.Errorf("unexpected file (-want +got):\n%s", diff)
312+
}
254313
})
255314
}
256315
}

lib/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ require (
6868
go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.13.0 // indirect
6969
go.opentelemetry.io/otel/log v0.13.0 // indirect
7070
go.opentelemetry.io/otel/sdk/log v0.13.0 // indirect
71+
golang.org/x/mod v0.27.0 // indirect
7172
google.golang.org/appengine/v2 v2.0.6 // indirect
7273
)
7374

lib/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91
12531253
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
12541254
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
12551255
golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
1256+
golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
1257+
golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
12561258
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
12571259
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
12581260
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=

workflows/steps/services/bcd_consumer/pkg/workflow/bcd_releases_worker.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"net/http"
2121

2222
"github.com/GoogleChrome/webstatus.dev/lib/gcpspanner/spanneradapters/bcdconsumertypes"
23+
"github.com/GoogleChrome/webstatus.dev/lib/gh"
2324
"github.com/GoogleChrome/webstatus.dev/workflows/steps/services/bcd_consumer/pkg/data"
2425
)
2526

@@ -45,7 +46,7 @@ type DataGetter interface {
4546
ctx context.Context,
4647
owner, repo string,
4748
httpClient *http.Client,
48-
filePattern string) (io.ReadCloser, error)
49+
filePattern string) (*gh.ReleaseFile, error)
4950
}
5051

5152
// DataParser describes the behavior to read raw bytes into the expected BCDData struct.
@@ -106,7 +107,7 @@ func (p BCDJobProcessor) Process(
106107
}
107108

108109
// Step 2. Parse the file.
109-
data, err := p.dataParser.Parse(file)
110+
data, err := p.dataParser.Parse(file.Contents)
110111
if err != nil {
111112
return err
112113
}

workflows/steps/services/bcd_consumer/pkg/workflow/bcd_releases_worker_test.go

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/GoogleChrome/webstatus.dev/lib/gcpspanner/spanneradapters/bcdconsumertypes"
2929
"github.com/GoogleChrome/webstatus.dev/lib/gen/jsonschema/mdn__browser_compat_data"
30+
"github.com/GoogleChrome/webstatus.dev/lib/gh"
3031
"github.com/GoogleChrome/webstatus.dev/workflows/steps/services/bcd_consumer/pkg/data"
3132
)
3233

@@ -56,7 +57,7 @@ type mockDownloadFileFromReleaseConfig struct {
5657
repoOwner string
5758
repoName string
5859
filePattern string
59-
fakeFile io.ReadCloser
60+
fakeFile *gh.ReleaseFile
6061
err error
6162
}
6263

@@ -69,7 +70,7 @@ func (m *MockDataGetter) DownloadFileFromRelease(
6970
_ context.Context,
7071
owner, repo string,
7172
_ *http.Client,
72-
filePattern string) (io.ReadCloser, error) {
73+
filePattern string) (*gh.ReleaseFile, error) {
7374
if m.mockDownloadFileFromReleaseCfg.repoOwner != owner ||
7475
m.mockDownloadFileFromReleaseCfg.repoName != repo ||
7576
m.mockDownloadFileFromReleaseCfg.filePattern != filePattern {
@@ -97,7 +98,8 @@ func (m *MockDataParser) Parse(in io.ReadCloser) (*data.BCDData, error) {
9798
m.t.Errorf("unable to read file")
9899
}
99100
if m.mockParseCfg.expectedFileContents != string(fileContents) {
100-
m.t.Error("unexpected file contents")
101+
m.t.Errorf("unexpected file contents. want: %s, got: %s",
102+
m.mockParseCfg.expectedFileContents, string(fileContents))
101103
}
102104

103105
return m.mockParseCfg.ret, m.mockParseCfg.err
@@ -205,6 +207,16 @@ func getSampleReleases() []bcdconsumertypes.BrowserRelease {
205207
}
206208

207209
func TestProcess(t *testing.T) {
210+
// Create a function to generate a file because Contents can only be read once
211+
testFileFn := func() *gh.ReleaseFile {
212+
return &gh.ReleaseFile{
213+
Contents: io.NopCloser(strings.NewReader("success")),
214+
Info: gh.ReleaseInfo{
215+
Tag: nil,
216+
},
217+
}
218+
}
219+
208220
testCases := []processWorkflowTest{
209221
{
210222
name: "successful process",
@@ -215,7 +227,7 @@ func TestProcess(t *testing.T) {
215227
repoOwner: repoOwner,
216228
repoName: repoName,
217229
filePattern: filePattern,
218-
fakeFile: io.NopCloser(strings.NewReader("success")),
230+
fakeFile: testFileFn(),
219231
err: nil,
220232
},
221233
mockParseCfg: &mockParseConfig{
@@ -244,7 +256,7 @@ func TestProcess(t *testing.T) {
244256
repoOwner: repoOwner,
245257
repoName: repoName,
246258
filePattern: filePattern,
247-
fakeFile: io.NopCloser(strings.NewReader("success")),
259+
fakeFile: testFileFn(),
248260
err: errTestGetter,
249261
},
250262
mockParseCfg: nil,
@@ -261,7 +273,7 @@ func TestProcess(t *testing.T) {
261273
repoOwner: repoOwner,
262274
repoName: repoName,
263275
filePattern: filePattern,
264-
fakeFile: io.NopCloser(strings.NewReader("success")),
276+
fakeFile: testFileFn(),
265277
err: nil,
266278
},
267279
mockParseCfg: &mockParseConfig{
@@ -282,7 +294,7 @@ func TestProcess(t *testing.T) {
282294
repoOwner: repoOwner,
283295
repoName: repoName,
284296
filePattern: filePattern,
285-
fakeFile: io.NopCloser(strings.NewReader("success")),
297+
fakeFile: testFileFn(),
286298
err: nil,
287299
},
288300
mockParseCfg: &mockParseConfig{
@@ -308,7 +320,7 @@ func TestProcess(t *testing.T) {
308320
repoOwner: repoOwner,
309321
repoName: repoName,
310322
filePattern: filePattern,
311-
fakeFile: io.NopCloser(strings.NewReader("success")),
323+
fakeFile: testFileFn(),
312324
err: nil,
313325
},
314326
mockParseCfg: &mockParseConfig{

workflows/steps/services/web_feature_consumer/cmd/job/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func main() {
9292
spanneradapters.NewWebFeatureGroupsConsumer(spannerClient),
9393
spanneradapters.NewWebFeatureSnapshotsConsumer(spannerClient),
9494
data.Parser{},
95+
data.V3Parser{},
9596
)
9697

9798
// Job Generation

0 commit comments

Comments
 (0)