Skip to content

Commit 0ff30db

Browse files
committed
Discard changes to .gitignore
1 parent 836ff57 commit 0ff30db

29 files changed

+2242
-421
lines changed

.gitignore

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
11
*venv*
22
*.ipynb_checkpoints*
3-
.vscode
4-
5-
node_modules
6-
test-results
3+
.vscode

beta_use_cases/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Use Case Datasets
2+
3+
This directory includes datasets for use on the "use cases" landing pages for mathesar.org.
4+
5+
## Loading into Mathesar
6+
7+
Each dataset has a `schema.sql` and `generated_data.sql` file which can be loaded into Mathesar. Each `schema.sql` file will drop an existing schema with the same name and create a new one.
8+
9+
Here's an example of loading these into a locally-running Mathesar instance.
10+
11+
```shell
12+
# (Optional) Generate the data
13+
python {use_case_name}/generate_data.py
14+
15+
# First load the schema and tables
16+
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < {use_case_name}/schema.sql
17+
# Then the sample data
18+
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < {use_case_name}/generated_data.sql
19+
```
20+
21+
## Philosophy
22+
23+
These datasets use a mix of "low fidelity" faker data and more domain-specific hardcoded strings to create fake, but plausible, datasets for various Mathesar use cases.
24+
25+
Timestamp columns that would be used for auditing, soft deletes, and so on have been omitted to reduce clutter.
26+
27+
Column IDs are always `BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY`.
28+
29+
## Development
30+
31+
The only requirement is to install dependencies with `pip install -r requirements.txt`.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Bike Shop sample data
2+
3+
This sample dataset represents a bicycle shop managing their customer service requests.
4+
5+
```mermaid
6+
%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html
7+
8+
erDiagram
9+
Customers {
10+
BIGINT id PK
11+
TEXT first_name "NOT NULL"
12+
TEXT last_name "NOT NULL"
13+
TEXT email
14+
TEXT phone
15+
}
16+
EquipmentTypes {
17+
BIGINT id PK
18+
TEXT name "NOT NULL"
19+
}
20+
Equipment {
21+
BIGINT id PK
22+
TEXT serial_number "NOT NULL UNIQUE"
23+
TEXT notes
24+
BIGINT type_id FK
25+
}
26+
Mechanics {
27+
BIGINT id PK
28+
TEXT first_name "NOT NULL"
29+
TEXT last_name "NOT NULL"
30+
}
31+
ServiceStatuses {
32+
BIGINT id PK
33+
TEXT name "NOT NULL UNIQUE"
34+
}
35+
ServiceRequests {
36+
BIGINT id PK
37+
BIGINT customer_id FK
38+
BIGINT equipment_id FK
39+
BIGINT mechanic_id FK
40+
TEXT request_description "NOT NULL"
41+
NUMERIC_10_2 cost
42+
TIMESTAMP time_in
43+
TIMESTAMP time_out
44+
}
45+
ServiceMilestones {
46+
BIGINT id PK
47+
BIGINT service_request_id FK
48+
BIGINT status_id FK
49+
TIMESTAMP update_time "DEFAULT NOW()"
50+
TEXT notes
51+
}
52+
53+
Equipment ||--|| EquipmentTypes : "type_id"
54+
ServiceRequests ||--|| Customers : "customer_id"
55+
ServiceRequests ||--|| Equipment : "equipment_id"
56+
ServiceRequests ||--|| Mechanics : "mechanic_id"
57+
ServiceMilestones ||--|| ServiceRequests : "service_request_id"
58+
ServiceMilestones ||--|| ServiceStatuses : "status_id"
59+
60+
```
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
import os
2+
import random
3+
from datetime import timedelta, datetime
4+
from faker import Faker
5+
6+
fake = Faker()
7+
8+
# Helper functions
9+
def clean_value(value):
10+
"""Clean a value for SQL COPY operations."""
11+
if value is None:
12+
return r"\N"
13+
if isinstance(value, str):
14+
return value.replace("\t", " ").replace("\n", " ")
15+
return str(value)
16+
17+
def write_to_sql_file(output_path, search_path, tables):
18+
"""Write the generated data to an SQL file."""
19+
with open(output_path, "w") as f:
20+
f.write(f'SET search_path="{search_path}";\n\n')
21+
for table_name, generator in tables.items():
22+
f.write(f'COPY "{table_name}" FROM stdin;\n')
23+
for row in generator:
24+
cleaned_row = "\t".join(map(clean_value, row))
25+
f.write(f"{cleaned_row}\n")
26+
f.write("\\.\n\n")
27+
print(f"SQL file generated: {output_path}")
28+
29+
def get_output_file_path(filename):
30+
"""Get the output file path relative to the current script's directory."""
31+
current_file_dir = os.path.dirname(os.path.abspath(__file__))
32+
return os.path.join(current_file_dir, filename)
33+
34+
# Constants
35+
NUM_CUSTOMERS = 20
36+
NUM_MECHANICS = 5
37+
NUM_EQUIPMENT_TYPES = 8
38+
NUM_EQUIPMENT = 50
39+
NUM_SERVICE_REQUESTS = 30
40+
NUM_SERVICE_MILESTONES = 100
41+
42+
EQUIPMENT_TYPES = [
43+
"Mountain Bike",
44+
"Road Bike",
45+
"Hybrid Bike",
46+
"Electric Bike",
47+
"BMX Bike",
48+
"Cyclocross Bike",
49+
"Folding Bike",
50+
"Touring Bike"
51+
]
52+
53+
PARTS_AND_NOTES = {
54+
"Frame": [
55+
"Small dent on the top tube identified during inspection.",
56+
"Frame cleaned and polished; customer commented on how shiny it looked.",
57+
"Noticed a crack near the bottom bracket; recommended a replacement.",
58+
],
59+
"Wheels": [
60+
"Bent rear rim; trued the wheel successfully.",
61+
"Replaced a broken spoke on the front wheel.",
62+
"Customer pleased with how smooth the wheels now spin.",
63+
],
64+
"Tires": [
65+
"Replaced a worn-out rear tire; customer opted for puncture-resistant model.",
66+
"Front tire inflated; slow leak detected and patched.",
67+
"Customer appreciated advice on tire pressure for road biking.",
68+
],
69+
"Brakes": [
70+
"Adjusted brake pads for better stopping power.",
71+
"Rear brake cable frayed; replaced with a new one.",
72+
"Customer remarked how responsive the brakes feel now.",
73+
],
74+
"Gears": [
75+
"Shifted gears sticking; replaced derailleur hanger.",
76+
"Customer reported skipping gears; adjusted indexing.",
77+
"Lubricated drivetrain; customer noticed quieter pedaling.",
78+
],
79+
"Handlebars": [
80+
"Re-wrapped handlebar tape; customer loved the color choice.",
81+
"Handlebar alignment corrected; was slightly off-center.",
82+
"Installed new ergonomic grips; customer was excited about the comfort.",
83+
],
84+
"Pedals": [
85+
"Left pedal bearings replaced due to grinding noise.",
86+
"Upgraded pedals to a clipless system; customer very happy.",
87+
"Mechanic noticed loose threads on right pedal spindle; tightened securely.",
88+
],
89+
"Seat": [
90+
"Seatpost adjusted for proper height; customer reported better comfort.",
91+
"Replaced torn saddle with a new gel-padded seat.",
92+
"Customer commented that the saddle now feels like new.",
93+
],
94+
}
95+
96+
REQUEST_DESCRIPTIONS = [
97+
"Bike makes a clicking noise while pedaling.",
98+
"Brakes feel soft and don't stop effectively.",
99+
"Gears are not shifting smoothly.",
100+
"Rear wheel wobbles; possible rim issue.",
101+
"Flat tire; needs replacement or repair.",
102+
"Customer wants a full tune-up before a race.",
103+
"Looking to upgrade to tubeless tires.",
104+
"Front fork feels stiff; possible suspension issue.",
105+
"Customer complained about an uncomfortable saddle.",
106+
"Handlebars feel loose and need adjustment.",
107+
"Chain keeps falling off during rides.",
108+
"Rear derailleur seems bent after a crash.",
109+
"Customer wants clipless pedals installed.",
110+
"Headset creaks when turning the handlebars.",
111+
"Electric bike battery isn't holding charge.",
112+
"Customer wants help installing accessories (e.g., lights, rack).",
113+
"Bike feels heavy and sluggish; might need a drivetrain cleaning.",
114+
"Suspension setup needs adjusting for rider weight.",
115+
"Customer reported squeaky brakes after riding in wet conditions.",
116+
"Child seat needs to be installed securely on the frame.",
117+
]
118+
119+
SERVICE_STATUSES = ["Received", "In Progress", "Awaiting Part", "Completed"]
120+
121+
def generate_customers():
122+
for i in range(1, NUM_CUSTOMERS + 1):
123+
yield [
124+
i,
125+
fake.first_name(),
126+
fake.last_name(),
127+
fake.email(),
128+
fake.phone_number()
129+
]
130+
131+
def generate_equipment_types():
132+
for i, name in enumerate(EQUIPMENT_TYPES, start=1):
133+
yield [i, name]
134+
135+
def generate_mechanics():
136+
for i in range(1, NUM_MECHANICS + 1):
137+
yield [
138+
i,
139+
fake.first_name(),
140+
fake.last_name()
141+
]
142+
143+
def generate_service_statuses():
144+
for i, name in enumerate(SERVICE_STATUSES, start=1):
145+
yield [i, name]
146+
147+
def generate_equipment(equipment_type_ids):
148+
for i in range(1, NUM_EQUIPMENT + 1):
149+
yield [
150+
i,
151+
random.choice(equipment_type_ids), # Valid type_id
152+
fake.unique.ean13(), # serial number
153+
""
154+
]
155+
156+
def generate_service_requests(customer_ids, equipment_ids, mechanic_ids):
157+
for i in range(1, NUM_SERVICE_REQUESTS + 1):
158+
yield [
159+
i,
160+
random.choice(customer_ids), # Valid customer_id
161+
random.choice(equipment_ids), # Valid equipment_id
162+
random.choice(mechanic_ids), # Valid mechanic_id
163+
random.choice(REQUEST_DESCRIPTIONS), # Realistic request description
164+
round(random.uniform(20, 500), 2),
165+
fake.date_time_this_year(),
166+
fake.date_time_this_year() if random.random() < 0.5 else None
167+
]
168+
169+
def generate_service_milestones(service_request_ids, status_ids):
170+
for i in range(1, NUM_SERVICE_MILESTONES + 1):
171+
part, notes = random.choice(list(PARTS_AND_NOTES.items()))
172+
yield [
173+
i,
174+
random.choice(service_request_ids), # Valid service_request_id
175+
random.choice(status_ids), # Valid status_id
176+
fake.date_time_this_year(),
177+
f"{part}: {random.choice(notes)}" # Realistic service note
178+
]
179+
180+
if __name__ == "__main__":
181+
# Generate valid IDs based on schema
182+
customer_ids = list(range(1, NUM_CUSTOMERS + 1))
183+
equipment_type_ids = list(range(1, NUM_EQUIPMENT_TYPES + 1))
184+
equipment_ids = list(range(1, NUM_EQUIPMENT + 1))
185+
mechanic_ids = list(range(1, NUM_MECHANICS + 1))
186+
service_request_ids = list(range(1, NUM_SERVICE_REQUESTS + 1))
187+
status_ids = list(range(1, len(SERVICE_STATUSES) + 1))
188+
189+
# Generate tables
190+
equipment = list(generate_equipment(equipment_type_ids))
191+
service_requests = list(generate_service_requests(customer_ids, equipment_ids, mechanic_ids))
192+
service_milestones = list(generate_service_milestones(service_request_ids, status_ids))
193+
194+
tables = {
195+
"Customers": generate_customers(),
196+
"Equipment Types": generate_equipment_types(),
197+
"Equipment": iter(equipment), # Pre-generated equipment
198+
"Mechanics": generate_mechanics(),
199+
"Service Statuses": generate_service_statuses(),
200+
"Service Requests": iter(service_requests), # Pre-generated service requests
201+
"Service Milestones": iter(service_milestones), # Pre-generated milestones
202+
}
203+
204+
sql_file = get_output_file_path("generated_data.sql")
205+
write_to_sql_file(sql_file, "Bike Shop", tables)

0 commit comments

Comments
 (0)