Skip to content

Commit 2d2a2f5

Browse files
authored
Merge pull request #804 from basedosdados/feat/search-ajust
feat(search): add new filter in Elastic Search
2 parents bc0bcab + e0e4f45 commit 2d2a2f5

File tree

6 files changed

+218
-17
lines changed

6 files changed

+218
-17
lines changed

backend/apps/api/v1/admin.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,10 @@ class DatasetAdmin(OrderedInlineModelAdminMixin, TabbedTranslationAdmin):
615615
"contains_raw_data_sources",
616616
"contains_information_requests",
617617
"contains_closed_data",
618+
"contains_direct_download_free",
619+
"contains_direct_download_paid",
620+
"contains_temporalcoverage_free",
621+
"contains_temporalcoverage_paid",
618622
"page_views",
619623
"created_at",
620624
"updated_at",
@@ -697,6 +701,10 @@ class TableAdmin(OrderedInlineModelAdminMixin, TabbedTranslationAdmin):
697701
"uncompressed_file_size",
698702
"compressed_file_size",
699703
"contains_open_data",
704+
"contains_direct_download_free",
705+
"contains_direct_download_paid",
706+
"contains_temporalcoverage_free",
707+
"contains_temporalcoverage_paid",
700708
"contains_closed_data",
701709
"page_views",
702710
]

backend/apps/api/v1/models.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,54 @@ def contains_closed_data(self):
680680
return True
681681
return False
682682

683+
@property
684+
def contains_direct_download_free(self):
685+
return len(
686+
[
687+
table
688+
for table in self.tables.exclude(status__slug__in=["under_review", "excluded"])
689+
.exclude(slug__in=["dicionario", "dictionary"])
690+
.all()
691+
if table.contains_direct_download_free
692+
]
693+
)
694+
695+
@property
696+
def contains_direct_download_paid(self):
697+
return len(
698+
[
699+
table
700+
for table in self.tables.exclude(status__slug__in=["under_review", "excluded"])
701+
.exclude(slug__in=["dicionario", "dictionary"])
702+
.all()
703+
if table.contains_direct_download_paid
704+
]
705+
)
706+
707+
@property
708+
def contains_temporalcoverage_free(self):
709+
return len(
710+
[
711+
table
712+
for table in self.tables.exclude(status__slug__in=["under_review", "excluded"])
713+
.exclude(slug__in=["dicionario", "dictionary"])
714+
.all()
715+
if table.contains_temporalcoverage_free
716+
]
717+
)
718+
719+
@property
720+
def contains_temporalcoverage_paid(self):
721+
return len(
722+
[
723+
table
724+
for table in self.tables.exclude(status__slug__in=["under_review", "excluded"])
725+
.exclude(slug__in=["dicionario", "dictionary"])
726+
.all()
727+
if table.contains_temporalcoverage_paid
728+
]
729+
)
730+
683731
@property
684732
def contains_tables(self):
685733
"""Returns true if there are tables in the dataset"""
@@ -1098,6 +1146,28 @@ def contains_closed_data(self):
10981146
return True
10991147
return False
11001148

1149+
@property
1150+
def contains_direct_download_free(self):
1151+
if self.uncompressed_file_size is None:
1152+
return False
1153+
return self.uncompressed_file_size < 100 * 1024 * 1024
1154+
1155+
@property
1156+
def contains_direct_download_paid(self):
1157+
if self.uncompressed_file_size is None:
1158+
return False
1159+
return self.uncompressed_file_size > 100 * 1024 * 1024
1160+
1161+
@property
1162+
def contains_temporalcoverage_free(self) -> bool:
1163+
coverage = get_full_temporal_coverage([self]) or []
1164+
return all(entry["type"] == "open" for entry in coverage)
1165+
1166+
@property
1167+
def contains_temporalcoverage_paid(self) -> bool:
1168+
coverage = get_full_temporal_coverage([self]) or []
1169+
return any(entry["type"] == "closed" for entry in coverage)
1170+
11011171
@property
11021172
def temporal_coverage(self) -> dict:
11031173
"""Temporal coverage"""

backend/apps/api/v1/schemas.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ class Dataset(BaseModel):
8181
contains_open_data: bool
8282
contains_closed_data: bool
8383
#
84+
contains_direct_download_free: bool
85+
contains_direct_download_paid: bool
86+
contains_temporalcoverage_free: bool
87+
contains_temporalcoverage_paid: bool
88+
#
8489
themes: List[Theme]
8590
organization: List[Organization]
8691
temporal_coverage: List[str]

backend/apps/api/v1/search_indexes.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,26 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable):
242242
indexed=False,
243243
)
244244

245+
contains_direct_download_free = indexes.BooleanField(
246+
model_attr="contains_direct_download_free",
247+
indexed=False,
248+
)
249+
250+
contains_direct_download_paid = indexes.BooleanField(
251+
model_attr="contains_direct_download_paid",
252+
indexed=False,
253+
)
254+
255+
contains_temporalcoverage_free = indexes.BooleanField(
256+
model_attr="contains_temporalcoverage_free",
257+
indexed=False,
258+
)
259+
260+
contains_temporalcoverage_paid = indexes.BooleanField(
261+
model_attr="contains_temporalcoverage_paid",
262+
indexed=False,
263+
)
264+
245265
contains_tables = indexes.BooleanField(
246266
model_attr="contains_tables",
247267
indexed=False,

backend/apps/api/v1/search_views.py

Lines changed: 113 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from django.core.files.storage import default_storage as storage
44
from django.http import JsonResponse
5+
from django.views import View
56
from haystack.forms import FacetedSearchForm
67
from haystack.generic_views import FacetedSearchView
78
from haystack.models import SearchResult
@@ -30,17 +31,6 @@ def search(self):
3031
# Start with all results
3132
sqs = self.searchqueryset.all()
3233

33-
# Debug print to see all form data
34-
print(
35-
"DEBUG: Form data:",
36-
{
37-
"spatial_coverage": self.spatial_coverage,
38-
"theme": self.theme,
39-
"organization": self.organization,
40-
"tag": self.tag,
41-
},
42-
)
43-
4434
# Text search if provided
4535
if q := self.cleaned_data.get("q"):
4636
sqs = (
@@ -109,6 +99,10 @@ class DatasetSearchView(FacetedSearchView):
10999
"contains_tables",
110100
"contains_raw_data_sources",
111101
"contains_information_requests",
102+
"contains_direct_download_free",
103+
"contains_direct_download_paid",
104+
"contains_temporalcoverage_free",
105+
"contains_temporalcoverage_paid",
112106
]
113107

114108
@property
@@ -153,10 +147,10 @@ def get(self, request, *args, **kwargs):
153147
}
154148
)
155149

156-
def get_facets(self, sqs: SearchQuerySet, facet_size=200):
150+
def get_facets(self, sqs: SearchQuerySet, facet_size=6):
157151
sqs = sqs.facet("theme_slug", size=facet_size)
158152
sqs = sqs.facet("organization_slug", size=facet_size)
159-
sqs = sqs.facet("spatial_coverage", size=facet_size)
153+
# sqs = sqs.facet("spatial_coverage", size=facet_size)
160154
sqs = sqs.facet("tag_slug", size=facet_size)
161155
sqs = sqs.facet("entity_slug", size=facet_size)
162156

@@ -296,9 +290,7 @@ def as_search_result(result: SearchResult, locale="pt"):
296290
)
297291

298292
entities = []
299-
for slug, name in zip(
300-
result.entity_slug or [], getattr(result, f"entity_name_{locale}") or []
301-
):
293+
for slug, name in zip(result.entity_slug or [], getattr(result, f"entity_name_{locale}") or []):
302294
entities.append(
303295
{
304296
"slug": slug,
@@ -338,6 +330,10 @@ def as_search_result(result: SearchResult, locale="pt"):
338330
"contains_open_data": result.contains_open_data,
339331
"contains_closed_data": result.contains_closed_data,
340332
"contains_tables": result.contains_tables,
333+
"contains_direct_download_free": result.contains_direct_download_free,
334+
"contains_direct_download_paid": result.contains_direct_download_paid,
335+
"contains_temporalcoverage_free": result.contains_temporalcoverage_free,
336+
"contains_temporalcoverage_paid": result.contains_temporalcoverage_paid,
341337
"contains_raw_data_sources": result.contains_raw_data_sources,
342338
"contains_information_requests": result.contains_information_requests,
343339
"n_tables": result.n_tables,
@@ -349,3 +345,104 @@ def as_search_result(result: SearchResult, locale="pt"):
349345
"first_raw_data_source_id": result.first_raw_data_source_id,
350346
"first_information_request_id": result.first_information_request_id,
351347
}
348+
349+
350+
class DatasetFacetValuesView(View):
351+
"""
352+
View para retornar os valores de uma faceta específica baseada nos parâmetros de filtro atuais.
353+
"""
354+
355+
facet_fields = [
356+
"tag_slug",
357+
"theme_slug",
358+
"entity_slug",
359+
"organization_slug",
360+
"spatial_coverage",
361+
]
362+
363+
@property
364+
def locale(self):
365+
return self.request.GET.get("locale", "pt")
366+
367+
@property
368+
def facet_name(self):
369+
return self.request.GET.get("facet", "").lower()
370+
371+
def get_form_kwargs(self):
372+
kwargs = {
373+
"contains": self.request.GET.getlist("contains"),
374+
"theme": self.request.GET.getlist("theme"),
375+
"organization": self.request.GET.getlist("organization"),
376+
"spatial_coverage": self.request.GET.getlist("spatial_coverage"),
377+
"tag": self.request.GET.getlist("tag"),
378+
"observation_level": self.request.GET.getlist("observation_level"),
379+
"locale": self.locale,
380+
}
381+
382+
if "q" in self.request.GET:
383+
kwargs["data"] = {"q": self.request.GET["q"]}
384+
385+
return kwargs
386+
387+
def get(self, request, *args, **kwargs):
388+
if self.facet_name not in self.facet_fields:
389+
return JsonResponse(
390+
{
391+
"error": f"Facet '{self.facet_name}' not available. Choose from: {self.facet_fields}"
392+
},
393+
status=400,
394+
)
395+
396+
form = DatasetSearchForm(**self.get_form_kwargs())
397+
sqs = form.search()
398+
399+
sqs = sqs.facet(self.facet_name, size=1000) # Tamanho grande para pegar todos os valores
400+
401+
facet_counts = sqs.facet_counts()
402+
facet_values = facet_counts.get("fields", {}).get(self.facet_name, [])
403+
404+
values = [{"key": value[0], "count": value[1]} for value in facet_values if value[0]]
405+
406+
if self.facet_name == "theme_slug":
407+
model = Theme
408+
name_field = f"name_{self.locale}"
409+
elif self.facet_name == "organization_slug":
410+
model = Organization
411+
name_field = f"name_{self.locale}"
412+
elif self.facet_name == "tag_slug":
413+
model = Tag
414+
name_field = f"name_{self.locale}"
415+
elif self.facet_name == "entity_slug":
416+
model = Entity
417+
name_field = f"name_{self.locale}"
418+
elif self.facet_name == "spatial_coverage":
419+
model = Area
420+
name_field = f"name_{self.locale}"
421+
else:
422+
model = None
423+
424+
if model:
425+
slugs = [v["key"] for v in values]
426+
translated_names = model.objects.filter(slug__in=slugs).values(
427+
"slug", name_field, "name"
428+
)
429+
name_map = {
430+
item["slug"]: {
431+
"name": item.get(name_field) or item.get("name") or item["slug"],
432+
"fallback": name_field not in item or item[name_field] is None,
433+
}
434+
for item in translated_names
435+
}
436+
437+
for value in values:
438+
translated = name_map.get(value["key"], {"name": value["key"], "fallback": True})
439+
value.update(translated)
440+
441+
return JsonResponse(
442+
{
443+
"facet": self.facet_name,
444+
"values": values,
445+
"count": len(values),
446+
"locale": self.locale,
447+
}
448+
)

backend/apps/api/v1/urls.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from django.views.decorators.csrf import csrf_exempt
55
from graphene_file_upload.django import FileUploadGraphQLView
66

7-
from backend.apps.api.v1.search_views import DatasetSearchView
7+
from backend.apps.api.v1.search_views import DatasetFacetValuesView, DatasetSearchView
88
from backend.apps.api.v1.views import DatasetRedirectView
99

1010

@@ -22,6 +22,7 @@ def graphql_view():
2222
path("api/v1/graphql", graphql_view()),
2323
path("graphql", graphql_view()),
2424
path("search/", DatasetSearchView.as_view()),
25+
path("facet_values/", DatasetFacetValuesView.as_view()),
2526
path("dataset/", DatasetRedirectView.as_view()),
2627
path("dataset_redirect/", DatasetRedirectView.as_view()),
2728
]

0 commit comments

Comments
 (0)