Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion teryt/management/commands/teryt_auto_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
class Command(BaseCommand):
help = 'Import TERYT data from ZIP files prepared by GUS,\
auto download, unpack and update with them.'
option_list = BaseCommand.option_list

def handle(self, *args, **options):
# download zip files from GUS site
Expand Down
25 changes: 14 additions & 11 deletions teryt/management/commands/teryt_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
already existing in database
"""

from optparse import make_option
import zipfile

from django.core.management.base import BaseCommand, CommandError
Expand All @@ -18,29 +17,33 @@
class Command(BaseCommand):
args = '[xml/zip file list]'
help = 'Import TERYT data from XML/ZIP files prepared by GUS'
option_list = BaseCommand.option_list + (
make_option('--update',
action='store_true',
dest='update',
default=False,
help='Update exisitng data'),
)

def add_arguments(self, parser):
parser.add_argument('file', nargs='+', type=str)
parser.add_argument(
'--update',
action='store_true',
dest='update',
default=False,
help='Update exisitng data'
)

def handle(self, *args, **options):
files = options['file']
force_ins = not options['update']

if not args:
if not files:
raise CommandError('At least 1 file name required')

for data_file in args:
for data_file in files:
self.stdout.write('Working on {}'.format(data_file))
if zipfile.is_zipfile(data_file):
zfile = zipfile.ZipFile(data_file)
fname = zfile.namelist()[0]
with zfile.open(fname) as xml_file:
update_database(xml_file, fname, force_ins)
else:
with open(data_file) as xml_file:
with open(data_file, encoding="utf8") as xml_file:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rexopl on Python 2.7 this throws error that encoding is not known, from what I see encoding was added to io.open in Python 3, so this has to be removed for Python 2.7.

More info I've found on SO https://stackoverflow.com/questions/10971033/backporting-python-3-openencoding-utf-8-to-python-2

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I was testing on python3 :). Right!

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rexopl still there are some problems with this and I'm confused why it's saying Unknown filename.

(PanelDjango) [Greyer@Asgaard] 23:05:00
(devel) ~/Documents/Repos/panel-django > ./manage.py teryt_parse addresses/xml/WMRODZ_2018-12-14.xml addresses/xml/TERC_Urzedowy_2018-12-14.xml addresses/xml/SIMC_Urzedowy_2018-12-14.xml addresses/xml/ULIC_Urzedowy_2018-12-14.xml
Working on addresses/xml/WMRODZ_2018-12-14.xml
CommandError: Unknown filename: 'WMRODZ_2018-12-14.xml'
(PanelDjango) [Greyer@Asgaard] 23:05:05
(devel) ~/Documents/Repos/panel-django > ll addresses/xml/
total 220320
drwxr-xr-x   6 Greyer  staff   192B Dec 14 23:31 .
drwxr-xr-x  14 Greyer  staff   448B Dec 15 18:33 ..
-rw-r--r--   1 Greyer  staff    28M Dec 14 00:00 SIMC_Urzedowy_2018-12-14.xml
-rw-r--r--   1 Greyer  staff   896K Dec 14 00:00 TERC_Urzedowy_2018-12-14.xml
-rw-r--r--   1 Greyer  staff    79M Dec 14 00:00 ULIC_Urzedowy_2018-12-14.xml
-rw-r--r--   1 Greyer  staff   1.5K Dec 14 22:01 WMRODZ_2018-12-14.xml
(PanelDjango) [Greyer@Asgaard] 23:05:37
(devel) ~/Documents/Repos/panel-django >

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is some problem with new files from GUS. There are even some warnings during zip extractions. I will check it in free time.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But generally you need to change names to hardcoded names:
fn_dict = OrderedDict([
('WMRODZ.xml', RodzajMiejscowosci),
('TERC.xml', JednostkaAdministracyjna),
('SIMC.xml', Miejscowosc),
('ULIC.xml', Ulica),
])

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I should invoke

./manage.py teryt_parse addresses/xml/

without filenames?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Path with filename: ./manage.py teryt_parse addresses/xml/WMRODZ.xml

update_database(xml_file, data_file, force_ins)
self.stdout.write('File {} uploaded'.format(data_file))

Expand Down
2 changes: 2 additions & 0 deletions teryt/utils_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def update_database(xml_stream, fname, force_flag):
else x['SYM'])

for vals in row_list:
if len(vals) == 0:
continue
instance = teryt_class()
instance.set_val(vals)
instance.aktywny = True
Expand Down