Skip to content
This repository was archived by the owner on Mar 13, 2020. It is now read-only.

Commit 15ee63e

Browse files
authored
[OSC-1291] add schema revision tool alembic (#20)
* add alembic * update travis build * updated pipenv * updated readme * rename schema to dpo * bump version number up for next release * pip -> pipenv
1 parent 3a83e6a commit 15ee63e

File tree

13 files changed

+516
-24
lines changed

13 files changed

+516
-24
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
install_deps:
33
pip install pipenv --upgrade
44
pipenv install --dev
5+
alembic -c dpo/alembic.ini -x postgresql+psycopg2://postgres:travisci@localhost:5432/postgres upgrade head
56

67
# Run unit tests
78
test_unit:
@@ -10,4 +11,3 @@ test_unit:
1011
# Run integration tests
1112
test_integration:
1213
./tests/integration/test_integration.sh
13-

Pipfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ autopep8 = "*"
1212
[packages]
1313
psycopg2-binary = "==2.7.7"
1414
SQLAlchemy = "==1.2.17"
15+
alembic = "==1.0.8"
16+
dpo = {path = "."}
1517

1618
[requires]
1719
python_version = "3.7"

Pipfile.lock

Lines changed: 85 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

alembic-dpo/env.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from __future__ import with_statement
2+
3+
from logging.config import fileConfig
4+
5+
from sqlalchemy import engine_from_config, create_engine
6+
from sqlalchemy import pool
7+
8+
from alembic import context
9+
10+
from dpo.Shared import BaseEntity
11+
from dpo.entities import ModelChecksumEntity
12+
from dpo.entities import DataPipelineExecutionEntity
13+
from dpo.Shared import Constants
14+
15+
# this is the Alembic Config object, which provides
16+
# access to the values within the .ini file in use.
17+
config = context.config
18+
19+
# Interpret the config file for Python logging.
20+
# This line sets up loggers basically.
21+
fileConfig(config.config_file_name)
22+
23+
# add your model's MetaData object here
24+
# for 'autogenerate' support
25+
# from myapp import mymodel
26+
target_metadata = BaseEntity.metadata
27+
28+
# other values from the config, defined by the needs of env.py,
29+
# can be acquired:
30+
# my_important_option = config.get_main_option("my_important_option")
31+
# ... etc.
32+
33+
if not context.get_x_argument():
34+
raise AttributeError(
35+
"example usage `alembic -c dpo/alembic.ini -x postgresql+psycopg2://postgres:postgres@localhost/postgres downgrade -1`")
36+
37+
url = context.get_x_argument()[0]
38+
39+
40+
def use_schema(object, name, type_, reflected, compare_to):
41+
if type_ == 'table' and object.schema != Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME:
42+
return False
43+
if (type_ == "column" and
44+
not reflected and
45+
object.info.get("skip_autogenerate", False)):
46+
return False
47+
if type_ == 'table' and name == 'alembic_version':
48+
return False
49+
return True
50+
51+
52+
def run_migrations_offline():
53+
"""Run migrations in 'offline' mode.
54+
55+
This configures the context with just a URL
56+
and not an Engine, though an Engine is acceptable
57+
here as well. By skipping the Engine creation
58+
we don't even need a DBAPI to be available.
59+
60+
Calls to context.execute() here emit the given string to the
61+
script output.
62+
63+
"""
64+
context.configure(
65+
url=url, target_metadata=target_metadata, literal_binds=True, include_schemas=True,
66+
include_object=use_schema, version_table=f'alembic_version_{Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}'
67+
)
68+
69+
with context.begin_transaction():
70+
context.run_migrations()
71+
72+
73+
def run_migrations_online():
74+
"""Run migrations in 'online' mode.
75+
76+
In this scenario we need to create an Engine
77+
and associate a connection with the context.
78+
79+
"""
80+
connectable = create_engine(
81+
url,
82+
poolclass=pool.NullPool,
83+
)
84+
85+
with connectable.connect() as connection:
86+
context.configure(
87+
connection=connection, target_metadata=target_metadata, include_schemas=True,
88+
include_object=use_schema, version_table=f'alembic_version_{Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}'
89+
)
90+
91+
with context.begin_transaction():
92+
context.run_migrations()
93+
94+
95+
if context.is_offline_mode():
96+
run_migrations_offline()
97+
else:
98+
run_migrations_online()

dpo/DataRepository.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@ def __init__(self, db_engine, logger=None):
1414
self.db_engine = db_engine
1515
self.session_maker = sessionmaker(bind=self.db_engine)
1616

17-
def ensure_schema_exists(self):
18-
self.db_engine.execute(f'CREATE SCHEMA IF NOT EXISTS {Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}')
19-
Shared.BaseEntity.metadata.create_all(self.db_engine)
20-
2117
def get_current_db_datetime_with_timezone(self):
2218
return self.db_engine.execute(select([func.now()])).fetchone()[0]
2319

dpo/Shared.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
class Constants:
99
APP_NAME = 'data-pipeline-orchestrator'
10-
DATA_PIPELINE_EXECUTION_SCHEMA_NAME = 'data_pipeline'
10+
DATA_PIPELINE_EXECUTION_SCHEMA_NAME = 'dpo'
1111
NO_LAST_SUCCESSFUL_EXECUTION = 'NO_LAST_SUCCESSFUL_EXECUTION'
1212

1313
class DataPipelineExecutionStatus:

dpo/alembic.ini

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# A generic, single database configuration.
2+
3+
[alembic]
4+
# path to migration scripts
5+
script_location = %(here)s/alembic
6+
7+
# template used to generate migration files
8+
# file_template = %%(rev)s_%%(slug)s
9+
10+
# timezone to use when rendering the date
11+
# within the migration file as well as the filename.
12+
# string value is passed to dateutil.tz.gettz()
13+
# leave blank for localtime
14+
# timezone =
15+
16+
# max length of characters to apply to the
17+
# "slug" field
18+
#truncate_slug_length = 40
19+
20+
# set to 'true' to run the environment during
21+
# the 'revision' command, regardless of autogenerate
22+
# revision_environment = false
23+
24+
# set to 'true' to allow .pyc and .pyo files without
25+
# a source .py file to be detected as revisions in the
26+
# versions/ directory
27+
# sourceless = false
28+
29+
# version location specification; this defaults
30+
# to alembic/versions. When using multiple version
31+
# directories, initial revisions must be specified with --version-path
32+
version_locations = %(here)s/alembic/versions
33+
34+
# the output encoding used when revision files
35+
# are written from script.py.mako
36+
# output_encoding = utf-8
37+
38+
39+
# Logging configuration
40+
[loggers]
41+
keys = root,sqlalchemy,alembic
42+
43+
[handlers]
44+
keys = console
45+
46+
[formatters]
47+
keys = generic
48+
49+
[logger_root]
50+
level = WARN
51+
handlers = console
52+
qualname =
53+
54+
[logger_sqlalchemy]
55+
level = WARN
56+
handlers =
57+
qualname = sqlalchemy.engine
58+
59+
[logger_alembic]
60+
level = INFO
61+
handlers =
62+
qualname = alembic
63+
64+
[handler_console]
65+
class = StreamHandler
66+
args = (sys.stderr,)
67+
level = NOTSET
68+
formatter = generic
69+
70+
[formatter_generic]
71+
format = %(levelname)-5.5s [%(name)s] %(message)s
72+
datefmt = %H:%M:%S

0 commit comments

Comments
 (0)