diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..1f884de3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,76 @@ +name: CI + +on: + push: + branches: [ main, master ] + pull_request: + branches: [ main, master ] + +jobs: + test: + name: ${{ matrix.python-version }} / ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04, windows-latest, macos-latest] + python-version: [ + "3.8", + "3.9", + "3.10", + "3.11", + "3.12", + "pypy-3.9", + "pypy-3.10" + ] + + steps: + - uses: actions/checkout@v3 + + # Install gettext on macOS + - name: Install gettext on macOS + if: runner.os == 'macOS' + run: | + brew install gettext + brew link gettext --force + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade setuptools wheel + python -m pip install tox tox-gh-actions + + - name: Test with tox + run: tox --skip-missing-interpreters false + + check: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox + - run: tox -e check + + docs: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox + - run: tox -e docs \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..533c9bd6 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,5 @@ +[flake8] +max-line-length = 140 +exclude = .tox,*.egg,build,data +select = E,W,F +ignore = W503 \ No newline at end of file diff --git a/setup.py b/setup.py index be8a6f10..02dcde7f 100755 --- a/setup.py +++ b/setup.py @@ -2,8 +2,10 @@ import re from pathlib import Path -from setuptools import find_packages -from setuptools import setup +from setuptools import ( + find_packages, + setup, +) def read(*names, **kwargs): diff --git a/src/datapilot/clients/altimate/utils.py b/src/datapilot/clients/altimate/utils.py index cc4b4fa5..2edaea59 100644 --- a/src/datapilot/clients/altimate/utils.py +++ b/src/datapilot/clients/altimate/utils.py @@ -55,7 +55,15 @@ def validate_permissions( return api_client.validate_upload_to_integration() -def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integration_environment, file_type, file_path, backend_url) -> Dict: +def onboard_file( + api_token, + tenant, + dbt_core_integration_id, + dbt_core_integration_environment, + file_type, + file_path, + backend_url, +) -> Dict: api_client = APIClient(api_token, base_url=backend_url, tenant=tenant) params = { @@ -84,7 +92,7 @@ def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integratio api_client.log("Error getting signed URL.") return { "ok": False, - "message": "Error in uploading the manifest. ", + "message": "Error in uploading the manifest.", } @@ -101,7 +109,7 @@ def start_dbt_ingestion(api_token, tenant, dbt_core_integration_id, dbt_core_int api_client.log("Error starting dbt ingestion worker") return { "ok": False, - "message": "Error starting dbt ingestion worker. ", + "message": "Error starting dbt ingestion worker.", } diff --git a/src/datapilot/core/platforms/dbt/executor.py b/src/datapilot/core/platforms/dbt/executor.py index 8d6d5cfd..f8286818 100644 --- a/src/datapilot/core/platforms/dbt/executor.py +++ b/src/datapilot/core/platforms/dbt/executor.py @@ -88,10 +88,11 @@ def __init__( ) if not self.selected_models: raise AltimateCLIArgumentError( - f"Invalid values provided in the --select argument. Could not find models associated with pattern: --select {' '.join(selected_models)}" + "Invalid values provided in the --select argument. " + f"Could not find models associated with pattern: --select {' '.join(selected_models)}" ) - self.excluded_models = None - self.excluded_models_flag = False + self.excluded_models = None + self.excluded_models_flag = False def _check_if_skipped(self, insight): if self.config.get("disabled_insights", False): diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py index ee2b3973..70abf1b1 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py @@ -11,7 +11,11 @@ class CheckMacroArgsHaveDesc(ChecksInsight): NAME = "Check macro arguments has description" ALIAS = "check_macro_args_have_desc" DESCRIPTION = "Macro arguments should have a description. " - REASON_TO_FLAG = "Clear descriptions for macro arguments are crucial as they prevent misunderstandings, enhance user comprehension, and simplify maintenance. This leads to more accurate data analysis and efficient workflows." + REASON_TO_FLAG = ( + "Clear descriptions for macro arguments are crucial as they prevent misunderstandings, " + "enhance user comprehension, and simplify maintenance. " + "This leads to more accurate data analysis and efficient workflows." + ) def _build_failure_result( self, diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py index afc853d2..2103d15c 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py @@ -11,7 +11,11 @@ class CheckMacroHasDesc(ChecksInsight): NAME = "Macro has documentation" ALIAS = "check_macro_has_desc" DESCRIPTION = "Macros should be documented." - REASON_TO_FLAG = "Undocumented macros can cause misunderstandings and inefficiencies in data modeling and analysis, as they make it difficult to understand their purpose and usage. Clear descriptions are vital for accuracy and streamlined workflow." + REASON_TO_FLAG = ( + "Undocumented macros can cause misunderstandings and inefficiencies in data modeling and analysis, " + "as they make it difficult to understand their purpose and usage. " + "Clear descriptions are vital for accuracy and streamlined workflow." + ) def _build_failure_result( self, diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py index 66dd1fdb..2780b08e 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py @@ -50,9 +50,16 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, model_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult: missing_test_group_str = "" for test in missing_test_groups: - missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + missing_test_group_str += ( + f"Test Group: {test.get(self.TEST_GROUP_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) - failure_message = f"The model `{model_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. " + failure_message = ( + f"The model `{model_unique_id}` does not have enough tests for the following groups:\n" + f"{missing_test_group_str}. " + ) recommendation = ( "Add tests with the specified groups for each model listed above. " "Having tests with specific groups ensures proper validation and data integrity." diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py index 3e5eb04d..9b271e4c 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py @@ -48,9 +48,16 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult: tests_str = "" for test in missing_tests: - tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + tests_str += ( + f"Test Name: {test.get(self.TEST_NAME_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) - failure_message = f"The model `{model_unique_id}` does not have enough tests:\n{tests_str}. " + failure_message = ( + f"The model `{model_unique_id}` does not have enough tests:\n" + f"{tests_str}. " + ) recommendation = ( "Add tests with the specified names for each model listed above. " "Having tests with specific names ensures proper validation and data integrity." diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py index a5ce95c8..1bc204c2 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py @@ -49,7 +49,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult: missing_test_type_str = "" for test in missing_tests: - missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + missing_test_type_str += ( + f"Test type: {test.get(self.TEST_TYPE_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) failure_message = f"The model `{model_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. " recommendation = ( diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py index 33d6a445..81831360 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py @@ -11,7 +11,11 @@ class CheckModelMaterializationByChilds(ChecksInsight): NAME = "Model materialization by children" ALIAS = "check_model_materialization_by_childs" DESCRIPTION = "Fewer children than threshold ideally should be view or ephemeral, more or equal should be table or incremental." - REASON_TO_FLAG = "The model is flagged due to inappropriate materialization: models with child counts above the threshold require robust and efficient data processing, hence they should be materialized as tables or incrementals for optimized query performance and data management." + REASON_TO_FLAG = ( + "The model is flagged due to inappropriate materialization: models with child counts above the threshold " + "require robust and efficient data processing, hence they should be materialized as tables or incrementals " + "for optimized query performance and data management." + ) THRESHOLD_CHILDS_STR = "threshold_childs" def _build_failure_result_view_materialization( diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py index 988c5be9..27df82f7 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py @@ -122,7 +122,10 @@ def get_config_schema(cls): }, "required": [cls.PATTERN_STR, cls.FOLDER_STR], }, - "description": "A list of regex patterns to check the model name against. Each pattern is applied to the folder specified. If no pattern is found for the folder, the default pattern is used.", + "description": ( + "A list of regex patterns to check the model name against. Each pattern is applied to the folder specified. " + "If no pattern is found for the folder, the default pattern is used." + ), "default": [], }, }, diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py index 2ab27ed3..bc78cd4f 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py @@ -66,7 +66,8 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: if not self.max_childs and not self.max_parents: self.logger.info( - "max_children and max_parents are required values in the configuration. Please provide the required values. Skipping the insight." + "max_children and max_parents are required values in the configuration. " + "Please provide the required values. Skipping the insight." ) return insights @@ -98,10 +99,18 @@ def _check_model_parents_and_childs(self, model_unique_id: str) -> Optional[str] parents = node.depends_on.nodes message = "" if len(parents) < self.min_parents or len(parents) > self.max_parents: - message += f"The model:{model_unique_id} doesn't have the required number of parents.\n Min parents: {self.min_parents}, Max parents: {self.max_parents}. It has f{len(parents)} parents\n" + message += ( + f"The model:{model_unique_id} doesn't have the required number of parents.\n" + f"Min parents: {self.min_parents}, Max parents: {self.max_parents}. " + f"It has {len(parents)} parents\n" + ) if len(children) < self.min_childs or len(children) > self.max_childs: - message += f"The model:{model_unique_id} doesn't have the required number of childs.\n Min childs: {self.min_childs}, Max childs: {self.max_childs}. It has f{len(children)} childs\n" + message += ( + f"The model:{model_unique_id} doesn't have the required number of childs.\n" + f"Min childs: {self.min_childs}, Max childs: {self.max_childs}. " + f"It has {len(children)} childs\n" + ) return message diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py index b4f3eaee..b0c7cae9 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py @@ -82,7 +82,10 @@ def get_config_schema(cls): "properties": { cls.FRESHNESS_STR: { "type": "array", - "description": "The freshness options that should be defined for the source. If not provided, all freshness options are allowed.", + "description": ( + "The freshness options that should be defined for the source. " + "If not provided, all freshness options are allowed." + ), "items": { "type": "string", "enum": ["error_after", "warn_after"], diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py index f988e641..ef353104 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py @@ -14,7 +14,10 @@ class CheckSourceHasMetaKeys(ChecksInsight): NAME = "Source has required metadata keys" ALIAS = "check_source_has_meta_keys" DESCRIPTION = "Check if the source has required metadata keys" - REASON_TO_FLAG = "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. It's important to ensure that the source includes all the required meta keys as per the configuration." + REASON_TO_FLAG = ( + "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. " + "It's important to ensure that the source includes all the required meta keys as per the configuration." + ) META_KEYS_STR = "meta_keys" ALLOW_EXTRA_KEYS_STR = "allow_extra_keys" diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py index c8a7710d..f25cc87c 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py @@ -48,10 +48,15 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, source_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult: missing_test_group_str = "" for test in missing_test_groups: - missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + missing_test_group_str += ( + f"Test Group: {test.get(self.TEST_GROUP_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) failure_message = ( - f"The source `{source_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. " + f"The source `{source_unique_id}` does not have enough tests for the following groups:\n" + f"{missing_test_group_str}. " ) recommendation = ( "Add tests with the specified groups for each source listed above. " diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py index 0964336f..53ffec92 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py @@ -48,7 +48,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, source_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult: tests_str = "" for test in missing_tests: - tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + tests_str += ( + f"Test Name: {test.get(self.TEST_NAME_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) failure_message = f"The source `{source_unique_id}` does not have enough tests:\n{tests_str}. " recommendation = ( diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py index b78e1140..e7a93bf9 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py @@ -47,7 +47,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, source_unique_id: str, missing_tests) -> DBTInsightResult: missing_test_type_str = "" for test in missing_tests: - missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + missing_test_type_str += ( + f"Test type: {test.get(self.TEST_TYPE_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) failure_message = f"The source `{source_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. " recommendation = ( diff --git a/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py b/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py index 1025fe5a..5d1c8739 100644 --- a/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py +++ b/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py @@ -108,6 +108,7 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: ) ) self.logger.debug( - f"Finished generating insights for DBTDownstreamModelsDependentOnSource. Found {len(insights)} models with direct source dependencies" + "Finished generating insights for DBTDownstreamModelsDependentOnSource. " + f"Found {len(insights)} models with direct source dependencies" ) return insights diff --git a/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py b/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py index b6c4d04d..9946dc62 100644 --- a/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py +++ b/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py @@ -15,7 +15,10 @@ class DBTRootModel(DBTModellingInsight): NAME = "Root model traceability" ALIAS = "root_model" - DESCRIPTION = "Identifies models in a dbt project with 0 direct parents, meaning these models cannot be traced back to a declared source or model." + DESCRIPTION = ( + "Identifies models in a dbt project with 0 direct parents, " + "meaning these models cannot be traced back to a declared source or model." + ) REASON_TO_FLAG = ( "Best Practice is to ensure all models can be traced back to a source or another model in the project. " "Root models with no direct parents can lead to challenges in tracking data lineage and understanding" diff --git a/src/datapilot/core/platforms/dbt/utils.py b/src/datapilot/core/platforms/dbt/utils.py index 6f43dff6..d1875a7b 100644 --- a/src/datapilot/core/platforms/dbt/utils.py +++ b/src/datapilot/core/platforms/dbt/utils.py @@ -265,67 +265,47 @@ def get_hard_coded_references(sql_code): # Define regex patterns to match different types of hard-coded references from_hard_coded_references = { "from_var_1": r"""(?ix) - - # first matching group # from or join followed by at least 1 whitespace character - (from | join)\s + - - # second matching group - # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s), an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark - ({{\s * var\s * \(\s *[\'\"]?) + (from | join)\s+ - # third matching group - # at least 1 of anything except a parenthesis or quotation mark - ([^)\'\"]+) + # second matching group + # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s) + # an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark + ({{\s*var\s*\(\s*[\'\"]?) - # fourth matching group - # 1 or 0 quotation mark, 0 or more whitespace character(s) - ([\'\"]?\s*) + # third matching group + # at least 1 of anything except a parenthesis or quotation mark + ([^)\'\"]+) - # fifth matching group - # a closing parenthesis, 0 or more whitespace character(s), closing }} - (\)\s *}}) + # fourth matching group + # 1 or 0 quotation mark, 0 or more whitespace character(s) + ([\'\"]?\s*) + # fifth matching group + # a closing parenthesis, 0 or more whitespace character(s), closing }} + (\)\s*}}) """, "from_var_2": r"""(?ix) + # first matching group + # from or join followed by at least 1 whitespace character + (from|join)\s+ - # first matching group - # from or join followed by at least 1 whitespace character - ( - from | join)\s + - - # second matching group - # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s), an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark - ({{\s * var\s * \(\s *[\'\"]?) - - # third matching group - # at least 1 of anything except a parenthesis or quotation mark - ([^)\'\"]+) - - # fourth matching group - # 1 or 0 quotation mark, 0 or more whitespace character(s) - ([\'\"]?\s*) - - # fifth matching group - # a comma - (,) - - # sixth matching group - # 0 or more whitespace character(s), 1 or 0 quotation mark - (\s *[\'\"]?) - - # seventh matching group - # at least 1 of anything except a parenthesis or quotation mark - ([^)\'\"]+) + # second matching group + # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s) + # an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark + ({{\s*var\s*\(\s*[\'\"]?) - # eighth matching group - # 1 or 0 quotation mark, 0 or more whitespace character(s) - ([\'\"]?\s*) + # third matching group + # at least 1 of anything except a parenthesis or quotation mark + ([^)\'\"]+) - # ninth matching group - # a closing parenthesis, 0 or more whitespace character(s), closing }} - (\)\s *}}) + # fourth matching group + # 1 or 0 quotation mark, 0 or more whitespace character(s) + ([\'\"]?\s*) + # fifth matching group + # a closing parenthesis, 0 or more whitespace character(s), closing }} + (\)\s*}}) """, "from_table_1": r"""(?ix) diff --git a/src/datapilot/utils/utils.py b/src/datapilot/utils/utils.py index feb0cb7e..16ea3978 100644 --- a/src/datapilot/utils/utils.py +++ b/src/datapilot/utils/utils.py @@ -273,9 +273,21 @@ def generate_partial_manifest_catalog(changed_files, base_path: str = "./"): nodes_str = ",\n".join(json.dumps(data) for data in nodes_data + sources_data) query = ( - "{% set result = {} %}{% set nodes = [" - + nodes_str - + '] %}{% for n in nodes %}{% if n["resource_type"] == "source" %}{% set columns = adapter.get_columns_in_relation(source(n["name"], n["table"])) %}{% else %}{% set columns = adapter.get_columns_in_relation(ref(n["name"])) %}{% endif %}{% set new_columns = [] %}{% for column in columns %}{% do new_columns.append({"column": column.name, "dtype": column.dtype}) %}{% endfor %}{% do result.update({n["unique_id"]:new_columns}) %}{% endfor %}{{ tojson(result) }}' + "{% set result = {} %}" + "{% set nodes = [" + nodes_str + '] %}' + "{% for n in nodes %}" + "{% if n['resource_type'] == 'source' %}" + "{% set columns = adapter.get_columns_in_relation(source(n['name'], n['table'])) %}" + "{% else %}" + "{% set columns = adapter.get_columns_in_relation(ref(n['name'])) %}" + "{% endif %}" + "{% set new_columns = [] %}" + "{% for column in columns %}" + "{% do new_columns.append({'column': column.name, 'dtype': column.dtype}) %}" + "{% endfor %}" + "{% do result.update({n['unique_id']:new_columns}) %}" + "{% endfor %}" + "{{ tojson(result) }}" ) dbt_compile_output = run_macro(query, base_path) diff --git a/tests/core/platform/dbt/test_utils.py b/tests/core/platform/dbt/test_utils.py index 1625b82b..41e82a2b 100644 --- a/tests/core/platform/dbt/test_utils.py +++ b/tests/core/platform/dbt/test_utils.py @@ -2,16 +2,20 @@ import pytest -from datapilot.core.platforms.dbt.constants import BASE -from datapilot.core.platforms.dbt.constants import INTERMEDIATE -from datapilot.core.platforms.dbt.constants import MART -from datapilot.core.platforms.dbt.constants import OTHER -from datapilot.core.platforms.dbt.constants import STAGING -from datapilot.core.platforms.dbt.utils import MODEL_TYPE_PATTERNS -from datapilot.core.platforms.dbt.utils import _check_model_naming_convention -from datapilot.core.platforms.dbt.utils import classify_model_type_by_folder -from datapilot.core.platforms.dbt.utils import classify_model_type_by_name -from datapilot.core.platforms.dbt.utils import get_hard_coded_references +from datapilot.core.platforms.dbt.constants import ( + BASE, + INTERMEDIATE, + MART, + OTHER, + STAGING, +) +from datapilot.core.platforms.dbt.utils import ( + MODEL_TYPE_PATTERNS, + _check_model_naming_convention, + classify_model_type_by_folder, + classify_model_type_by_name, + get_hard_coded_references, +) @pytest.mark.parametrize( diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 5eea78a3..13be3f46 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -2,10 +2,14 @@ import pytest -from datapilot.core.platforms.dbt.utils import get_manifest_wrapper -from datapilot.core.platforms.dbt.utils import get_models -from datapilot.utils.utils import extract_folders_in_path -from datapilot.utils.utils import is_superset_path +from datapilot.core.platforms.dbt.utils import ( + get_manifest_wrapper, + get_models, +) +from datapilot.utils.utils import ( + extract_folders_in_path, + is_superset_path, +) test_cases = [ (Path("/home/user/documents/file.txt"), ["home", "user", "documents"]), diff --git a/tox.ini b/tox.ini index 8b1227db..c3705df5 100644 --- a/tox.ini +++ b/tox.ini @@ -14,68 +14,73 @@ envlist = clean, check, docs, - {py38,py39,py310,py311,py312,pypy38,pypy39,pypy310}-{cover,nocov}, + py{38,39,310,311,312}-{cover,nocov}, + pypy{39,310}-{cover,nocov}, report -ignore_basepython_conflict = true +skip_missing_interpreters = true [testenv] basepython = - pypy38: {env:TOXPYTHON:pypy3.8} - pypy39: {env:TOXPYTHON:pypy3.9} - pypy310: {env:TOXPYTHON:pypy3.10} - py38: {env:TOXPYTHON:python3.8} - py39: {env:TOXPYTHON:python3.9} - py310: {env:TOXPYTHON:python3.10} - py311: {env:TOXPYTHON:python3.11} - py312: {env:TOXPYTHON:python3.12} - {bootstrap,clean,check,report,docs,codecov}: {env:TOXPYTHON:python3} + py38: python3.8 + py39: python3.9 + py310: python3.10 + py311: python3.11 + py312: python3.12 + pypy39: pypy3.9 + pypy310: pypy3.10 + {clean,check,docs,report}: python3 setenv = PYTHONPATH={toxinidir}/tests PYTHONUNBUFFERED=yes + cover: SETUPPY_CFLAGS=-coverage passenv = * -usedevelop = - cover: true - nocov: false deps = - pytest - cover: pytest-cov + pytest>=6.0 + pytest-cov commands = - nocov: {posargs:pytest -vv --ignore=src} - cover: {posargs:pytest --cov --cov-report=term-missing --cov-report=xml -vv} + {posargs:pytest --cov --cov-report=term-missing -vv tests} [testenv:check] deps = docutils check-manifest - pre-commit + flake8 readme-renderer pygments isort skip_install = true commands = python setup.py check --strict --metadata --restructuredtext - check-manifest . - pre-commit run --all-files --show-diff-on-failure + check-manifest {toxinidir} + flake8 src tests setup.py + isort --verbose --check-only --diff --filter-files src tests setup.py [testenv:docs] -usedevelop = true deps = -r{toxinidir}/docs/requirements.txt commands = sphinx-build {posargs:-E} -b html docs dist/docs sphinx-build -b linkcheck docs dist/docs +[testenv:clean] +commands = coverage erase +skip_install = true +deps = coverage + [testenv:report] -deps = - coverage +deps = coverage skip_install = true commands = coverage report coverage html -[testenv:clean] -commands = coverage erase -skip_install = true -deps = - coverage +[gh-actions] +python = + 3.8: py38-cover,py38-nocov + 3.9: py39-cover,py39-nocov + 3.10: py310-cover,py310-nocov + 3.11: py311-cover,py311-nocov + 3.12: py312-cover,py312-nocov + pypy-3.9: pypy39-cover,pypy39-nocov + pypy-3.10: pypy310-cover,pypy310-nocov