Skip to content

Commit 21ed3dc

Browse files
committed
WIP
1 parent cdffb28 commit 21ed3dc

File tree

18 files changed

+943
-0
lines changed

18 files changed

+943
-0
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Hardware Store sample data
2+
3+
This sample dataset represents a chain of hardware stores managing their inventory and rentals.
4+
5+
```mermaid
6+
%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html
7+
8+
erDiagram
9+
"Store Locations" {
10+
BIGINT id PK
11+
string name
12+
string address
13+
}
14+
15+
"Customers" {
16+
BIGINT id PK
17+
string first_name
18+
string last_name
19+
string email
20+
string phone
21+
string address
22+
}
23+
24+
"Assets" {
25+
BIGINT id PK
26+
string name
27+
string serial_number
28+
NUMERIC rental_price
29+
NUMERIC sale_price
30+
string rental_period
31+
string location
32+
BIGINT store_id FK
33+
}
34+
35+
"Transactions" {
36+
BIGINT id PK
37+
BIGINT asset_id FK
38+
BIGINT customer_id FK
39+
string transaction_type
40+
TIMESTAMP transaction_date
41+
NUMERIC total_charge
42+
string note
43+
}
44+
45+
"Rentals" {
46+
BIGINT id PK
47+
BIGINT transaction_id FK
48+
TIMESTAMP rental_start
49+
TIMESTAMP rental_end
50+
TIMESTAMP time_out
51+
TIMESTAMP time_in
52+
INTERVAL rental_time
53+
}
54+
55+
%% Relationships
56+
%% See: https://mermaid.js.org/syntax/entityRelationshipDiagram.html#relationship-syntax
57+
"Assets" ||--|{ "Store Locations" : "store_id"
58+
"Transactions" ||--|| "Assets" : "asset_id"
59+
"Transactions" ||--|{ "Customers" : "customer_id"
60+
"Rentals" ||--|| "Transactions" : "transaction_id"
61+
```
62+
63+
64+
## Loading Data
65+
66+
The generated SQL file, `generate_data/load_data.sql`, contains all the necessary COPY commands to import data into your database. The data (and the load data file) are produced by the `generate_data.py` file, which can be adjusted and re-run to alter the data if needed.
67+
68+
Load the data into a locally-running Mathesar instance like this:
69+
70+
```shell
71+
# First load the schema and tables
72+
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < schema.sql
73+
# Then the sample data
74+
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < generated_data.sql
75+
```
76+
77+
## Development
78+
79+
The only requirement is to install dependencies with `pip install -r requirements.txt`.
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import os
2+
import random
3+
from faker import Faker
4+
import faker_commerce
5+
6+
fake = Faker()
7+
fake.add_provider(faker_commerce.Provider)
8+
9+
# Number of rows to generate
10+
NUM_STORES = 5
11+
NUM_CUSTOMERS = 20
12+
NUM_ASSETS = 50
13+
NUM_TRANSACTIONS = 60
14+
NUM_RENTALS = 30
15+
16+
# Helper function to clean values for COPY
17+
def clean_value(value):
18+
if value is None:
19+
return r"\N"
20+
if isinstance(value, str):
21+
return value.replace("\t", " ").replace("\n", " ")
22+
return str(value)
23+
24+
# Table Data Generation
25+
def generate_store_locations():
26+
for i in range(1, NUM_STORES + 1):
27+
yield [i, fake.company(), fake.address()]
28+
29+
def generate_customers():
30+
for i in range(1, NUM_CUSTOMERS + 1):
31+
yield [
32+
i,
33+
fake.first_name(),
34+
fake.last_name(),
35+
fake.email(),
36+
fake.phone_number(),
37+
fake.address(),
38+
]
39+
40+
def generate_assets(store_ids):
41+
for i in range(1, NUM_ASSETS + 1):
42+
rental_period = random.choice(["daily", "weekly", "monthly"])
43+
rental_price = round(random.uniform(5, 100), 2)
44+
sale_price = (
45+
round(rental_price * random.uniform(0.5, 0.8), 2) # Discounted sale price
46+
if random.random() < 0.2 else None
47+
)
48+
yield [
49+
i,
50+
fake.ecommerce_name(),
51+
fake.unique.ean13(),
52+
rental_price,
53+
sale_price,
54+
rental_period,
55+
f"Aisle {random.randint(1, 20)} - Shelf {random.randint(1, 10)}",
56+
random.choice(store_ids),
57+
]
58+
59+
def generate_transactions(asset_ids, customer_ids):
60+
for i in range(1, NUM_TRANSACTIONS + 1):
61+
asset_id = random.choice(asset_ids)
62+
customer_id = random.choice(customer_ids)
63+
transaction_type = random.choice(["Sale", "Rental", "Return"])
64+
transaction_date = fake.date_time_this_year()
65+
total_charge = round(random.uniform(10, 500), 2)
66+
yield [i, asset_id, customer_id, transaction_type, transaction_date, total_charge, fake.sentence()]
67+
68+
def generate_rentals(transaction_ids):
69+
for i in range(1, NUM_RENTALS + 1):
70+
transaction_id = random.choice(transaction_ids)
71+
rental_start = fake.date_time_this_year()
72+
rental_end = fake.date_time_between_dates(datetime_start=rental_start)
73+
rental_time = rental_end - rental_start
74+
yield [i, transaction_id, rental_start, rental_end, rental_start, rental_end, rental_time]
75+
76+
# Generate Data
77+
store_ids = list(range(1, NUM_STORES + 1))
78+
customer_ids = list(range(1, NUM_CUSTOMERS + 1))
79+
asset_ids = list(range(1, NUM_ASSETS + 1))
80+
transaction_ids = list(range(1, NUM_TRANSACTIONS + 1))
81+
82+
tables = {
83+
"Store Locations": generate_store_locations(),
84+
"Customers": generate_customers(),
85+
"Assets": generate_assets(store_ids),
86+
"Transactions": generate_transactions(asset_ids, customer_ids),
87+
"Rentals": generate_rentals(transaction_ids),
88+
}
89+
90+
# Write to SQL file
91+
sql_file = os.path.join(os.getcwd(), "generated_data.sql")
92+
93+
with open(sql_file, "w") as f:
94+
f.write('SET search_path="Hardware Store";\n\n')
95+
96+
for table_name, generator in tables.items():
97+
# Add quotes around table name since it contains spaces
98+
f.write(f'COPY "{table_name}" FROM stdin;\n')
99+
for row in generator:
100+
cleaned_row = "\t".join(map(clean_value, row))
101+
f.write(f"{cleaned_row}\n")
102+
f.write("\\.\n\n")
103+
104+
print(f"SQL file generated: {sql_file}")

0 commit comments

Comments
 (0)