diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 97c55b782..fb1ae4f91 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -1151,6 +1151,25 @@ def extract(self, columns, table=None, fk_column=None, rename=None): self.add_foreign_key(fk_column, table, "id") return self + def extract_expand( + self, column, expand, table=None, fk_column=None, fk_column_type=int, pk=None + ): + "Use expand function to transform values in column and extract them into a new table" + table = table or column + fk_column = fk_column or "{}_id".format(table) + self.add_column(fk_column, fk_column_type) + for row_pk, row in self.pks_and_rows_where(): + value = row[column] + expanded = expand(value) + if isinstance(expanded, dict): + new_pk = self.db[table].insert(expanded, pk="id", replace=True).last_pk + self.update(row_pk, {fk_column: new_pk}) + # Can drop the original column now + self.transform(drop=[column]) + # And add that foreign key + self.add_foreign_key(fk_column, table, "id") + return self + def create_index(self, columns, index_name=None, unique=False, if_not_exists=False): if index_name is None: index_name = "idx_{}_{}".format( diff --git a/tests/test_extract.py b/tests/test_extract.py index 25dd6e228..2507c758b 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -1,5 +1,6 @@ -from sqlite_utils.db import Index, InvalidColumns +from sqlite_utils.db import Index, InvalidColumns, ForeignKey import itertools +import json import pytest @@ -171,3 +172,24 @@ def test_extract_error_on_incompatible_existing_lookup_table(fresh_db): fresh_db["species2"].insert({"id": 1, "common_name": 3.5}) with pytest.raises(InvalidColumns): fresh_db["tree"].extract("common_name", table="species2") + + +def test_extract_expand(fresh_db): + fresh_db["trees"].insert( + {"id": 1, "species": '{"id": 5, "name": "Tree 1", "common_name": "Palm"}'}, + pk="id", + ) + assert fresh_db.table_names() == ["trees"] + fresh_db["trees"].extract_expand( + "species", expand=json.loads, table="species", pk="id" + ) + assert set(fresh_db.table_names()) == {"trees", "species"} + assert list(fresh_db["trees"].rows) == [{"id": 1, "species_id": 5}] + assert list(fresh_db["species"].rows) == [ + {"id": 5, "name": "Tree 1", "common_name": "Palm"} + ] + assert fresh_db["trees"].foreign_keys == [ + ForeignKey( + table="trees", column="species_id", other_table="species", other_column="id" + ) + ] diff --git a/tests/test_rows.py b/tests/test_rows.py index 3d33ecb53..73bd94fae 100644 --- a/tests/test_rows.py +++ b/tests/test_rows.py @@ -104,4 +104,3 @@ def test_pks_and_rows_where_compound_pk(fresh_db): (("number", 1), {"type": "number", "number": 1, "plusone": 2}), (("number", 2), {"type": "number", "number": 2, "plusone": 3}), ] - assert False