Skip to content

Commit a41ab89

Browse files
committed
Take analysis settings out of models
The approach where each model can define custom analysers did not match Elasticsearch's structure well, and created more complexity than it was worth.
1 parent 36ec42f commit a41ab89

File tree

6 files changed

+28
-30
lines changed

6 files changed

+28
-30
lines changed

annotator/annotation.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,10 @@
3535
}
3636
}
3737

38-
ANALYSIS = {}
39-
4038
class Annotation(es.Model):
4139

4240
__type__ = TYPE
4341
__mapping__ = MAPPING
44-
__analysis__ = ANALYSIS
4542

4643
def save(self, *args, **kwargs):
4744
_add_default_permissions(self)

annotator/elasticsearch.py

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,8 @@ def drop_all(self):
7272
self.conn.indices.close(self.index)
7373
self.conn.indices.delete(self.index)
7474

75-
def create_models(self, models):
75+
def create_models(self, models, analysis_settings):
7676
mappings = _compile_mappings(models)
77-
analysis = _compile_analysis(models)
7877

7978
# Test for index existence while also checking if connection works
8079
try:
@@ -88,17 +87,17 @@ def create_models(self, models):
8887
# If index does not yet exist, simply create the index
8988
self.conn.indices.create(self.index, body={
9089
'mappings': mappings,
91-
'settings': {'analysis': analysis},
90+
'settings': {'analysis': analysis_settings},
9291
})
9392
else:
9493
# Otherwise, update its settings and mappings
95-
self._update_analysis(analysis)
94+
self._update_analysis(analysis_settings)
9695
self._update_mappings(mappings)
9796

9897
def _update_analysis(self, analysis):
9998
"""Update analyzers and filters"""
100-
settings = self.conn.indices.get_settings(index=self.index)
101-
existing = settings[self.index]['settings']['index']['analysis']
99+
settings = self.conn.indices.get_settings(index=self.index).values()[0]
100+
existing = settings['settings']['index'].get('analysis', {})
102101
# Only bother if new settings would differ from existing settings
103102
if not self._analysis_up_to_date(existing, analysis):
104103
try:
@@ -122,7 +121,7 @@ def _update_mappings(self, mappings):
122121

123122
@staticmethod
124123
def _analysis_up_to_date(existing, analysis):
125-
"""Tell whether existing settings are up to date"""
124+
"""Tell whether existing analysis settings are up to date"""
126125
new_analysis = existing.copy()
127126
for section, items in analysis.items():
128127
new_analysis.setdefault(section,{}).update(items)
@@ -264,21 +263,6 @@ def _compile_mappings(models):
264263
return mappings
265264

266265

267-
def _compile_analysis(models):
268-
"""Merge the custom analyzers and such from the models"""
269-
analysis = {}
270-
for model in models:
271-
for section, items in model.get_analysis().items():
272-
existing_items = analysis.setdefault(section, {})
273-
for name in items:
274-
if name in existing_items:
275-
fmt = "Duplicate definition of 'index.analysis.{}.{}'."
276-
msg = fmt.format(section, name)
277-
raise RuntimeError(msg)
278-
existing_items.update(items)
279-
return analysis
280-
281-
282266
def _csv_split(s, delimiter=','):
283267
return [r for r in csv.reader([s], delimiter=delimiter)][0]
284268

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
"""Custom Elasticsearch analyzers that can be used for indexing fields in
2+
models (Annotation, Document).
3+
"""
4+
5+
ANALYSIS = {
6+
'analyzer': {},
7+
'filter': {},
8+
'tokenizer': {},
9+
}

annotator/reindexer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44

55
from .annotation import Annotation
66
from .document import Document
7+
from .elasticsearch_analyzers import ANALYSIS
78

89

910
class Reindexer(object):
1011

1112
es_models = Annotation, Document
13+
analysis_settings = ANALYSIS
1214

1315
def __init__(self, conn, interactive=False):
1416
self.conn = conn
@@ -60,7 +62,9 @@ def alias(self, index, alias):
6062

6163
def get_index_config(self):
6264
# Configure index mappings
63-
index_config = {'mappings': {}}
65+
index_config = {'mappings': {},
66+
'settings': {'analysis': self.analysis_settings},
67+
}
6468
for model in self.es_models:
6569
index_config['mappings'].update(model.get_mapping())
6670
return index_config

run.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020

2121
from flask import Flask, g, current_app
2222
import elasticsearch
23-
from annotator import es, annotation, auth, authz, document, store
23+
from annotator import es, annotation, auth, authz, document, \
24+
elasticsearch_analyzers, store
2425
from tests.helpers import MockUser, MockConsumer, MockAuthenticator
2526
from tests.helpers import mock_authorizer
2627

@@ -61,7 +62,8 @@ def main():
6162
es.authorization_enabled = app.config['AUTHZ_ON']
6263

6364
try:
64-
es.create_models([annotation.Annotation, document.Document])
65+
es.create_models(models=[annotation.Annotation, document.Document],
66+
analysis_settings=elasticsearch_analyzers.ANALYSIS)
6567
except elasticsearch.exceptions.RequestError as e:
6668
if e.error.startswith('MergeMappingException'):
6769
date = time.strftime('%Y-%m-%d')

tests/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import os
22
from flask import Flask, g, request
33

4-
from annotator import es, auth, authz, annotation, store, document
4+
from annotator import es, auth, authz, annotation, document, \
5+
elasticsearch_analyzers, store
56

67
from .helpers import MockUser, MockConsumer
78

@@ -33,7 +34,8 @@ def setup_class(cls):
3334
es.drop_all()
3435

3536
def setup(self):
36-
es.create_models([annotation.Annotation, document.Document])
37+
es.create_models(models=[annotation.Annotation, document.Document],
38+
analysis_settings=elasticsearch_analyzers.ANALYSIS)
3739
es.conn.cluster.health(wait_for_status='yellow')
3840
self.cli = self.app.test_client()
3941

0 commit comments

Comments
 (0)