From 10e04749b3a3ba28ba79a670b4069da5b43a03f7 Mon Sep 17 00:00:00 2001 From: Jerome Berthier Date: Thu, 26 Oct 2023 19:48:55 +0200 Subject: [PATCH 1/2] Add Query terms --- src/query.rs | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/src/query.rs b/src/query.rs index ef841a0a..31e2e5b6 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,5 +1,7 @@ use pyo3::prelude::*; use tantivy as tv; +use crate::to_pyerr; +use tv::schema::{Field, Term}; /// Tantivy's Query #[pyclass(frozen)] @@ -15,7 +17,79 @@ impl Query { #[pymethods] impl Query { + #[staticmethod] + fn term(field_id: u32, text: &str) -> Query { + let term = Term::from_field_text(Field::from_field_id(field_id), text); + Query { + inner: Box::new(tv::query::TermQuery::new( + term, + tv::schema::IndexRecordOption::Basic, + )), + } + } + + #[staticmethod] + fn fuzzy_term(field_id: u32, distance: u8, text: &str) -> Query { + let ftq = tv::query::FuzzyTermQuery::new( + Term::from_field_text(Field::from_field_id(field_id), text), + distance, + true, + ); + Query { + inner: (Box::new(ftq)), + } + } + + #[staticmethod] + fn regex(field_id: u32, pattern: &str) -> PyResult { + let rq = tv::query::RegexQuery::from_pattern( + pattern, + Field::from_field_id(field_id), + ) + .map_err(to_pyerr)?; + Ok(Query { + inner: Box::new(rq), + }) + } + + #[staticmethod] + fn phrase(field_id: u32, words: Vec<&str>) -> Query { + let terms = words + .iter() + .map(|&w| Term::from_field_text(Field::from_field_id(field_id), w)) + .collect::>(); + Query { + inner: Box::new(tv::query::PhraseQuery::new(terms)), + } + } + + #[staticmethod] + fn boost(q: &Query, boost: f32) -> Query { + let bq = tv::query::BoostQuery::new(q.get().box_clone(), boost); + Query { + inner: Box::new(bq), + } + } + + #[staticmethod] + fn and_q(qs : Vec>) -> Query { + Query { + inner: Box::new(tv::query::BooleanQuery::intersection( + qs.iter().map(|q| q.get().box_clone()).collect::>() + )) + } + } + + #[staticmethod] + fn or_q(qs : Vec>) -> Query { + Query { + inner: Box::new(tv::query::BooleanQuery::union( + qs.iter().map(|q| q.get().box_clone()).collect::>() + )) + } + } + fn __repr__(&self) -> PyResult { - Ok(format!("Query({:?})", self.get())) + Ok(format!("{:#?}", self.get())) } -} +} \ No newline at end of file From 15c399ce51f61fc9cfbba9ce3c96fb0e054b590b Mon Sep 17 00:00:00 2001 From: Jerome Berthier Date: Tue, 31 Oct 2023 09:27:06 +0100 Subject: [PATCH 2/2] wip query & geocoding --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/geocoding.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 8 ++++++++ src/query.rs | 14 +++++++------- 5 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 src/geocoding.rs diff --git a/Cargo.lock b/Cargo.lock index a92d6684..42bdce70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1110,7 +1110,7 @@ dependencies = [ [[package]] name = "tantivy" -version = "0.20.1" +version = "0.20.2" dependencies = [ "base64", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 951b6b36..a9a34ec8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.20.1" +version = "0.20.2" readme = "README.md" authors = ["Damir Jelić "] edition = "2018" diff --git a/src/geocoding.rs b/src/geocoding.rs new file mode 100644 index 00000000..b3a4b1e4 --- /dev/null +++ b/src/geocoding.rs @@ -0,0 +1,46 @@ +#![allow(clippy::new_ret_no_self)] + +use pyo3::{exceptions, prelude::*, types::PyAny}; + +use crate::{ + document::{extract_value, Document}, + get_field, + parser_error::QueryParserErrorIntoPy, + query::Query, + schema::Schema, + searcher::Searcher, + to_pyerr, +}; +use tantivy as tv; +use tantivy::{ + directory::MmapDirectory, + schema::{NamedFieldDocument, Term, Value, Field}, + tokenizer::{ + Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer, + TextAnalyzer, + }, +}; + + +/// IndexWriter is the user entry-point to add documents to the index. +/// +/// To create an IndexWriter first create an Index and call the writer() method +/// on the index object. +#[pyclass] +pub(crate) struct Geocoding { + attr1: u64, +} + +impl Geocoding { + pub(crate) fn private_method(&self) -> u64 { + 32 + } +} + +#[pymethods] +impl Geocoding { + #[staticmethod] + pub fn static_meth() -> u64 { + 42 + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 302a3218..20a7bf52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ use pyo3::{exceptions, prelude::*, wrap_pymodule}; mod document; mod facet; mod index; +mod geocoding; mod parser_error; mod query; mod schema; @@ -86,6 +87,7 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_wrapped(wrap_pymodule!(query_parser_error))?; + m.add_wrapped(wrap_pymodule!(geocoding_tools))?; Ok(()) } @@ -153,3 +155,9 @@ pub(crate) fn get_field( Ok(field) } + +#[pymodule] +fn geocoding_tools(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} \ No newline at end of file diff --git a/src/query.rs b/src/query.rs index 31e2e5b6..b18ecbc9 100644 --- a/src/query.rs +++ b/src/query.rs @@ -18,7 +18,7 @@ impl Query { #[pymethods] impl Query { #[staticmethod] - fn term(field_id: u32, text: &str) -> Query { + pub fn term(field_id: u32, text: &str) -> Query { let term = Term::from_field_text(Field::from_field_id(field_id), text); Query { inner: Box::new(tv::query::TermQuery::new( @@ -29,7 +29,7 @@ impl Query { } #[staticmethod] - fn fuzzy_term(field_id: u32, distance: u8, text: &str) -> Query { + pub fn fuzzy_term(field_id: u32, distance: u8, text: &str) -> Query { let ftq = tv::query::FuzzyTermQuery::new( Term::from_field_text(Field::from_field_id(field_id), text), distance, @@ -41,7 +41,7 @@ impl Query { } #[staticmethod] - fn regex(field_id: u32, pattern: &str) -> PyResult { + pub fn regex(field_id: u32, pattern: &str) -> PyResult { let rq = tv::query::RegexQuery::from_pattern( pattern, Field::from_field_id(field_id), @@ -53,7 +53,7 @@ impl Query { } #[staticmethod] - fn phrase(field_id: u32, words: Vec<&str>) -> Query { + pub fn phrase(field_id: u32, words: Vec<&str>) -> Query { let terms = words .iter() .map(|&w| Term::from_field_text(Field::from_field_id(field_id), w)) @@ -64,7 +64,7 @@ impl Query { } #[staticmethod] - fn boost(q: &Query, boost: f32) -> Query { + pub fn boost(q: &Query, boost: f32) -> Query { let bq = tv::query::BoostQuery::new(q.get().box_clone(), boost); Query { inner: Box::new(bq), @@ -72,7 +72,7 @@ impl Query { } #[staticmethod] - fn and_q(qs : Vec>) -> Query { + pub fn and_q(qs : Vec>) -> Query { Query { inner: Box::new(tv::query::BooleanQuery::intersection( qs.iter().map(|q| q.get().box_clone()).collect::>() @@ -81,7 +81,7 @@ impl Query { } #[staticmethod] - fn or_q(qs : Vec>) -> Query { + pub fn or_q(qs : Vec>) -> Query { Query { inner: Box::new(tv::query::BooleanQuery::union( qs.iter().map(|q| q.get().box_clone()).collect::>()