Skip to content

Commit 236b703

Browse files
yel0kReverseTM
authored andcommitted
Changed string template
1 parent 30e6d42 commit 236b703

File tree

16 files changed

+375
-31
lines changed

16 files changed

+375
-31
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3838
- Data partitioning
3939
- Ability to continue generation
4040
- Availability to ignore some models for generation
41+
42+
### Changed
43+
44+
- String templates replaced with jinja like

doc/ru/usage.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,10 @@ open_ai:
160160
- `min_length`: Минимальная длина строки. По умолчанию `1`.
161161
- `max_length`: Максимальная длина строки. По умолчанию `32`.
162162
- `logical_type`: Логический тип строки. Поддерживаемые значения: `first_name`, `last_name`, `phone`, `text`.
163-
- `template`: Шаблон для генерации строки. Символ `A` - любая большая буква, символ `a` - любая маленькая буква,
164-
символ `0` - любая цифра, символ `#` - любой символ. Остальные символы остаются как есть.
163+
- `template`: Jinja-подобный шаблон для генерации строки. Позволяет использовать любые поля генерируемой модели и
164+
задавать паттерн строки с помощью функции `pattern`, где символ `A` - любая большая буква, символ `a` - любая маленькая буква,
165+
символ `0` - любая цифра, символ `#` - любой символ, а остальные символы остаются как есть.
166+
Также поддерживается использование фильтров, таких как `upper` и `lower`.
165167
- `locale`: Локаль для генерации строк. Поддерживаемые значения: `ru`, `en`. По умолчанию `en`.
166168
- `without_large_letters`: Флаг, указывающий, исключать ли большие буквы из строки.
167169
- `without_small_letters`: Флаг, указывающий, исключать ли маленькие буквы из строки.
@@ -307,9 +309,13 @@ models:
307309
- name: passport
308310
type: string
309311
type_params:
310-
template: AA 00 000 000
312+
template: "{{ pattern('AA 00 000 000') }}"
311313
distinct_percentage: 1
312314
ordered: true
315+
- name: email
316+
type: string
317+
type_params:
318+
template: "{{ first_name_en | lower }}.{{ id }}@example.com"
313319
- name: rating
314320
type: float
315321
type_params:

go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ require (
3636
github.com/charmbracelet/x/term v0.2.1 // indirect
3737
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect
3838
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
39+
github.com/emirpasic/gods v1.18.1 // indirect
3940
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
41+
github.com/flosch/pongo2 v0.0.0-20200913210552-0d938eb266f3 // indirect
4042
github.com/goccy/go-json v0.10.5 // indirect
4143
github.com/golang/snappy v0.0.4 // indirect
4244
github.com/google/flatbuffers v25.2.10+incompatible // indirect
@@ -58,6 +60,7 @@ require (
5860
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
5961
github.com/muesli/cancelreader v0.2.2 // indirect
6062
github.com/muesli/termenv v0.15.3-0.20240618155329-98d742f6907a // indirect
63+
github.com/otaviokr/topological-sort v1.1.0 // indirect
6164
github.com/pierrec/lz4/v4 v4.1.22 // indirect
6265
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
6366
github.com/rivo/uniseg v0.4.7 // indirect

go.sum

Lines changed: 85 additions & 0 deletions
Large diffs are not rendered by default.

internal/generator/models/common.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@ package models
22

33
import (
44
"encoding/json"
5+
"github.com/otaviokr/topological-sort/toposort"
56
"io"
67
"os"
78
"path/filepath"
89
"reflect"
10+
"regexp"
911
"strings"
1012

1113
"github.com/ilyakaznacheev/cleanenv"
@@ -119,3 +121,46 @@ func parseErrsToString(errs []error) string {
119121

120122
return sb.String()
121123
}
124+
125+
func TopologicalSort(columns []*Column) ([]string, error) {
126+
graph := make(map[string][]string)
127+
for _, c := range columns {
128+
graph[c.Name] = make([]string, 0)
129+
130+
if c.Type != "string" {
131+
continue
132+
}
133+
134+
for _, r := range c.Ranges {
135+
if r.StringParams.Template == "" {
136+
continue
137+
}
138+
139+
graph[c.Name] = extractValuesFromTemplate(r.StringParams.Template)
140+
}
141+
}
142+
143+
sortedVertexes, err := toposort.ReverseTarjan(graph)
144+
if err != nil {
145+
return nil, err
146+
}
147+
148+
return sortedVertexes, nil
149+
}
150+
151+
func extractValuesFromTemplate(template string) []string {
152+
re := regexp.MustCompile(`{{\s*([^}]+)\s*}}`)
153+
matches := re.FindAllStringSubmatch(template, -1)
154+
155+
var values []string
156+
for _, match := range matches {
157+
expr := match[1]
158+
159+
parts := regexp.MustCompile(`\s*\|\s*|\s+`).Split(expr, -1)
160+
if len(parts) > 0 && parts[0] != "" && !strings.Contains(parts[0], "(") {
161+
values = append(values, parts[0])
162+
}
163+
}
164+
165+
return values
166+
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
package models
2+
3+
import (
4+
"github.com/stretchr/testify/require"
5+
"testing"
6+
)
7+
8+
func TestExtractValuesFromTemplate(t *testing.T) {
9+
type testCase struct {
10+
name string
11+
template string
12+
expected []string
13+
}
14+
15+
testCases := []testCase{
16+
{
17+
name: "Empty template",
18+
template: "",
19+
expected: nil,
20+
},
21+
{
22+
name: "Valid template",
23+
template: "{{ foo }}.{{boo}}",
24+
expected: []string{"foo", "boo"},
25+
},
26+
{
27+
name: "Template with filters",
28+
template: "{{ foo | upper | lower }}",
29+
expected: []string{"foo"},
30+
},
31+
{
32+
name: "Template with functions",
33+
template: "{{ upper('foo') | lower }}@{{ boo }}",
34+
expected: []string{"boo"},
35+
},
36+
{
37+
name: "Invalid template",
38+
template: "{_{ foo }}",
39+
expected: nil,
40+
},
41+
}
42+
43+
testFunc := func(t *testing.T, tc testCase) {
44+
t.Helper()
45+
46+
actual := extractValuesFromTemplate(tc.template)
47+
require.Equal(t, tc.expected, actual)
48+
}
49+
50+
for _, tc := range testCases {
51+
t.Run(tc.name, func(t *testing.T) { testFunc(t, tc) })
52+
}
53+
}
54+
55+
func TestTopologicalSort(t *testing.T) {
56+
type testCase struct {
57+
name string
58+
columns []*Column
59+
wantErr bool
60+
expected []string
61+
}
62+
63+
testCases := []testCase{
64+
{
65+
name: "Empty columns",
66+
columns: []*Column{},
67+
wantErr: false,
68+
expected: []string{},
69+
},
70+
{
71+
name: "Columns with dependencies",
72+
columns: []*Column{
73+
{
74+
Name: "1",
75+
Type: "string",
76+
Ranges: []*Params{
77+
{
78+
StringParams: &ColumnStringParams{
79+
Template: "{{ 3 }}",
80+
},
81+
},
82+
},
83+
},
84+
{
85+
Name: "2",
86+
Type: "string",
87+
Ranges: []*Params{
88+
{
89+
StringParams: &ColumnStringParams{
90+
Template: "{{ 4 }}",
91+
},
92+
},
93+
},
94+
},
95+
{
96+
Name: "3",
97+
Type: "string",
98+
Ranges: []*Params{
99+
{
100+
StringParams: &ColumnStringParams{
101+
Template: "{{ 2 }}",
102+
},
103+
},
104+
},
105+
},
106+
{
107+
Name: "4",
108+
Type: "string",
109+
Ranges: []*Params{
110+
{
111+
StringParams: &ColumnStringParams{
112+
Template: "",
113+
},
114+
},
115+
},
116+
},
117+
},
118+
wantErr: false,
119+
expected: []string{"4", "2", "3", "1"},
120+
},
121+
{
122+
name: "Columns with cycle dependencies",
123+
columns: []*Column{
124+
{
125+
Name: "1",
126+
Type: "string",
127+
Ranges: []*Params{
128+
{
129+
StringParams: &ColumnStringParams{
130+
Template: "{{ 2 }}",
131+
},
132+
},
133+
},
134+
},
135+
{
136+
Name: "2",
137+
Type: "string",
138+
Ranges: []*Params{
139+
{
140+
StringParams: &ColumnStringParams{
141+
Template: "{{ 1 }}",
142+
},
143+
},
144+
},
145+
},
146+
},
147+
wantErr: true,
148+
expected: nil,
149+
},
150+
}
151+
152+
testFunc := func(t *testing.T, tc testCase) {
153+
t.Helper()
154+
155+
actual, err := TopologicalSort(tc.columns)
156+
require.Equal(t, tc.wantErr, err != nil)
157+
require.Equal(t, tc.expected, actual)
158+
}
159+
160+
for _, tc := range testCases {
161+
t.Run(tc.name, func(t *testing.T) { testFunc(t, tc) })
162+
}
163+
}

internal/generator/usecase/general/generator/generator.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ type valueID struct {
200200
type BatchGenerator struct {
201201
numbers []valueID
202202
nextNumber int
203-
valuer func(number valueID) (any, error)
203+
valuer func(number valueID, generatedValues map[string]any) (any, error)
204204
}
205205

206206
func (cg *ColumnGenerator) NewBatchGenerator(batchSize uint64) *BatchGenerator {
@@ -226,14 +226,14 @@ func (cg *ColumnGenerator) NewBatchGenerator(batchSize uint64) *BatchGenerator {
226226
}
227227
}
228228

229-
valuer := func(id valueID) (any, error) {
229+
valuer := func(id valueID, generatedValues map[string]any) (any, error) {
230230
vg := cg.rangeGenerators[id.generatorIndex]
231231

232232
if vg.nullPercentage > 0 && fastRandomFloat(cg.dataColumnSeed+uint64(id.number)) < vg.nullPercentage {
233233
return nil, nil //nolint:nilnil
234234
}
235235

236-
return vg.generator.Value(id.number)
236+
return vg.generator.Value(id.number, generatedValues)
237237
}
238238

239239
return &BatchGenerator{
@@ -243,8 +243,8 @@ func (cg *ColumnGenerator) NewBatchGenerator(batchSize uint64) *BatchGenerator {
243243
}
244244

245245
// Value returns random value for described column.
246-
func (g *BatchGenerator) Value() (any, error) {
247-
res, err := g.valuer(g.numbers[g.nextNumber])
246+
func (g *BatchGenerator) Value(generatedValues map[string]any) (any, error) {
247+
res, err := g.valuer(g.numbers[g.nextNumber], generatedValues)
248248
g.nextNumber++
249249
g.nextNumber %= len(g.numbers)
250250

internal/generator/usecase/general/generator/value/datetime.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func (g *DateTimeGenerator) SetTotalCount(totalValuesCount uint64) error {
2626
}
2727

2828
// Value returns n-th date from range.
29-
func (g *DateTimeGenerator) Value(number float64) (any, error) {
29+
func (g *DateTimeGenerator) Value(number float64, _ map[string]any) (any, error) {
3030
fromSec := g.From.Unix()
3131
toSec := g.To.Unix()
3232

internal/generator/usecase/general/generator/value/enum.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func (g *EnumGenerator) SetTotalCount(totalValuesCount uint64) error {
3131
return nil
3232
}
3333

34-
func (g *EnumGenerator) Value(number float64) (any, error) {
34+
func (g *EnumGenerator) Value(number float64, _ map[string]any) (any, error) {
3535
idx := int(math.Floor(number)) / g.rowsPerValue
3636

3737
return g.Values[idx], nil

internal/generator/usecase/general/generator/value/float.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func (g *FloatGenerator) SetTotalCount(totalValuesCount uint64) error {
2626
}
2727

2828
// Value returns n-th float number from range.
29-
func (g *FloatGenerator) Value(number float64) (any, error) {
29+
func (g *FloatGenerator) Value(number float64, _ map[string]any) (any, error) {
3030
value := orderedFloat64(g.From, g.To, number, g.totalValuesCount)
3131

3232
if g.BitWidth == 32 { //nolint:mnd

0 commit comments

Comments
 (0)