22
33import java .io .File ;
44import java .io .IOException ;
5+ import java .io .StringReader ;
56import java .net .MalformedURLException ;
67import java .net .URL ;
78import java .nio .file .FileSystems ;
2223import javax .annotation .PreDestroy ;
2324
2425import org .apache .lucene .analysis .Analyzer ;
26+ import org .apache .lucene .analysis .TokenStream ;
27+ import org .apache .lucene .analysis .Tokenizer ;
2528import org .apache .lucene .analysis .core .KeywordAnalyzer ;
29+ import org .apache .lucene .analysis .core .LowerCaseFilter ;
2630import org .apache .lucene .analysis .core .WhitespaceAnalyzer ;
31+ import org .apache .lucene .analysis .custom .CustomAnalyzer ;
32+ import org .apache .lucene .analysis .miscellaneous .ASCIIFoldingFilter ;
2733import org .apache .lucene .analysis .miscellaneous .PerFieldAnalyzerWrapper ;
2834import org .apache .lucene .analysis .standard .StandardAnalyzer ;
35+ import org .apache .lucene .analysis .standard .StandardFilter ;
36+ import org .apache .lucene .analysis .standard .StandardTokenizer ;
37+ import org .apache .lucene .analysis .standard .StandardTokenizerFactory ;
38+ import org .apache .lucene .analysis .tokenattributes .CharTermAttribute ;
2939import org .apache .lucene .document .Document ;
3040import org .apache .lucene .document .Field ;
3141import org .apache .lucene .document .SortedDocValuesField ;
3646import org .apache .lucene .index .IndexWriterConfig ;
3747import org .apache .lucene .index .IndexableField ;
3848import org .apache .lucene .index .Term ;
49+ import org .apache .lucene .queryparser .classic .QueryParser ;
3950import org .apache .lucene .search .BooleanClause ;
4051import org .apache .lucene .search .BooleanClause .Occur ;
4152import org .apache .lucene .search .BooleanQuery ;
5364import org .apache .lucene .store .Directory ;
5465import org .apache .lucene .store .FSDirectory ;
5566import org .apache .lucene .util .BytesRef ;
67+ import org .apache .lucene .util .PagedBytes .Reader ;
5668import org .apache .lucene .util .QueryBuilder ;
69+ import org .apache .lucene .util .Version ;
5770import org .jsoup .Jsoup ;
5871import org .slf4j .Logger ;
5972import org .slf4j .LoggerFactory ;
6275import org .springframework .context .annotation .PropertySource ;
6376import org .springframework .core .env .Environment ;
6477import org .springframework .stereotype .Component ;
65-
6678import edu .asu .conceptpower .app .constants .LuceneFieldNames ;
6779import edu .asu .conceptpower .app .constants .SearchFieldNames ;
6880import edu .asu .conceptpower .app .db4o .IConceptDBManager ;
@@ -124,6 +136,7 @@ public class LuceneUtility implements ILuceneUtility {
124136
125137 private String lucenePath ;
126138
139+
127140 private int numberOfResults ;
128141
129142 private IndexWriter writer = null ;
@@ -132,19 +145,23 @@ public class LuceneUtility implements ILuceneUtility {
132145 private Directory index ;
133146 private Path relativePath = null ;
134147 private IndexSearcher searcher = null ;
135-
148+ private Analyzer customAnalyzer = null ;
149+
136150 /**
137151 *
138152 * @throws LuceneException
139153 */
140154 @ PostConstruct
141- public void init () throws LuceneException {
155+ public void init () throws LuceneException , IOException {
156+ customAnalyzer = CustomAnalyzer .builder ().withTokenizer ("keyword" ).addTokenFilter ("asciifolding" ).addTokenFilter ("worddelimiter" ).
157+ addTokenFilter ("lowercase" ).build ();
142158 lucenePath = env .getProperty ("lucenePath" );
143159 numberOfResults = Integer .parseInt (env .getProperty ("numberOfLuceneResults" ));
144160 try {
145161 relativePath = FileSystems .getDefault ().getPath (lucenePath , "index" );
162+
146163 index = FSDirectory .open (relativePath );
147- configWhiteSpace = new IndexWriterConfig (standardAnalyzer );
164+ configWhiteSpace = new IndexWriterConfig (customAnalyzer );
148165 writer = new IndexWriter (index , configWhiteSpace );
149166 reader = DirectoryReader .open (writer , true );
150167 searcher = new IndexSearcher (reader );
@@ -256,7 +273,7 @@ private ConceptEntry getConceptFromDocument(Document d) throws IllegalAccessExce
256273 LuceneField luceneFieldAnnotation = field .getAnnotation (LuceneField .class );
257274 field .setAccessible (true );
258275 if (luceneFieldAnnotation != null && d .get (luceneFieldAnnotation .lucenefieldName ()) != null )
259- if (! luceneFieldAnnotation .isMultiple ()) {
276+ if (luceneFieldAnnotation .isMultiple ()) {
260277 IndexableField [] indexableFields = d .getFields (luceneFieldAnnotation .lucenefieldName () + LuceneFieldNames .NOT_LOWERCASED );
261278 if (indexableFields == null || indexableFields .length == 0 ) {
262279 indexableFields = d .getFields (luceneFieldAnnotation .lucenefieldName ());
@@ -497,7 +514,7 @@ public ConceptEntry[] queryIndex(Map<String, String> fieldMap, String operator,
497514 if (operator == null || operator .equalsIgnoreCase (SearchParamters .OP_AND )) {
498515 occur = BooleanClause .Occur .MUST ;
499516 }
500-
517+
501518 java .lang .reflect .Field [] fields = ConceptEntry .class .getDeclaredFields ();
502519
503520 for (java .lang .reflect .Field field : fields ) {
@@ -517,9 +534,8 @@ public ConceptEntry[] queryIndex(Map<String, String> fieldMap, String operator,
517534
518535 }
519536
520- PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper (standardAnalyzer ,
521- analyzerPerField );
522-
537+
538+ PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper (customAnalyzer , analyzerPerField );
523539 QueryBuilder qBuild = new QueryBuilder (perFieldAnalyzerWrapper );
524540 BooleanQuery .Builder builder = new BooleanQuery .Builder ();
525541
@@ -579,32 +595,25 @@ public ConceptEntry[] queryIndex(Map<String, String> fieldMap, String operator,
579595 ConceptEntry entry = getConceptFromDocument (d );
580596 concepts .add (entry );
581597 }
598+ return concepts .toArray (new ConceptEntry [concepts .size ()]);
582599 }
583600
584601 catch (IOException ex ) {
585602 throw new LuceneException ("Issues in querying lucene index. Please retry" , ex );
586603 }
587- logger .debug ("Number of concepts retrieved from lucene = " + concepts .size ());
588- return concepts .toArray (new ConceptEntry [concepts .size ()]);
589-
590604 }
591605
592- private void buildQuery (BooleanClause .Occur occur , PerFieldAnalyzerWrapper perFieldAnalyzerWrapper ,
593- QueryBuilder qBuild , BooleanQuery .Builder builder , LuceneField luceneFieldAnnotation , String searchString ) {
606+ private void buildQuery (BooleanClause .Occur occur , PerFieldAnalyzerWrapper perFieldAnalyzerWrapper ,QueryBuilder qBuild , BooleanQuery .Builder builder , LuceneField luceneFieldAnnotation , String searchString ) {
594607 if (luceneFieldAnnotation .isTokenized ()) {
595608 BooleanQuery .Builder tokenizedQueryBuilder = new BooleanQuery .Builder ();
596- buildTokenizedOrWildCardQuery (luceneFieldAnnotation , searchString , tokenizedQueryBuilder );
609+ buildTokenizedOrWildCardQuery (luceneFieldAnnotation , searchString , qBuild , tokenizedQueryBuilder );
597610
598611 if (luceneFieldAnnotation .isShortPhraseSearchable ()) {
599612 BooleanQuery .Builder rootQueryBuilder = new BooleanQuery .Builder ();
600613 rootQueryBuilder .add (tokenizedQueryBuilder .build (), Occur .SHOULD );
601614 // Short word searching
602615 BooleanQuery .Builder shortWordSearchQueryBuilder = new BooleanQuery .Builder ();
603- shortWordSearchQueryBuilder .add (
604- new PhraseQuery (luceneFieldAnnotation .lucenefieldName () + LuceneFieldNames .UNTOKENIZED_SUFFIX ,
605- searchString ),
606- Occur .SHOULD );
607-
616+ shortWordSearchQueryBuilder .add (new PhraseQuery (luceneFieldAnnotation .lucenefieldName () + LuceneFieldNames .UNTOKENIZED_SUFFIX , searchString ), Occur .SHOULD );
608617 rootQueryBuilder .add (shortWordSearchQueryBuilder .build (), Occur .SHOULD );
609618 tokenizedQueryBuilder = rootQueryBuilder ;
610619 }
@@ -613,24 +622,25 @@ private void buildQuery(BooleanClause.Occur occur, PerFieldAnalyzerWrapper perFi
613622 } else {
614623 if (luceneFieldAnnotation .isWildCardSearchEnabled ()) {
615624 createWildCardSearchQuery (luceneFieldAnnotation , searchString , builder , occur );
616- } else {
617- builder .add (new BooleanClause (
618- new TermQuery (new Term (luceneFieldAnnotation .lucenefieldName (), searchString )), occur ));
619625 }
620- }
626+ builder .add (new BooleanClause ((qBuild .createPhraseQuery (luceneFieldAnnotation .lucenefieldName (), searchString )), occur ));
627+ }
621628 }
622629
623- private void buildTokenizedOrWildCardQuery (LuceneField luceneFieldAnnotation , String searchString ,
624- BooleanQuery .Builder tokenizedQueryBuilder ) {
625- for (String searchValue : searchString .split (" " )) {
630+ private void buildTokenizedOrWildCardQuery (LuceneField luceneFieldAnnotation , String searchString , QueryBuilder qBuild ,
631+ BooleanQuery .Builder tokenizedQueryBuilder ) {
626632 if (luceneFieldAnnotation .isWildCardSearchEnabled ()) {
627- createWildCardSearchQuery (luceneFieldAnnotation , searchValue , tokenizedQueryBuilder , Occur .MUST );
633+ BooleanQuery .Builder analyzedBuilder = new BooleanQuery .Builder ();
634+ createWildCardSearchQuery (luceneFieldAnnotation , searchString , analyzedBuilder , Occur .SHOULD );
635+ analyzedBuilder .add (new BooleanClause (
636+ (qBuild .createPhraseQuery (luceneFieldAnnotation .lucenefieldName (), searchString )), Occur .SHOULD ));
637+ tokenizedQueryBuilder .add (analyzedBuilder .build (), Occur .MUST );
628638 } else {
629- tokenizedQueryBuilder .add (new PhraseQuery (luceneFieldAnnotation .lucenefieldName (), searchValue ),
639+ tokenizedQueryBuilder .add (qBuild . createPhraseQuery (luceneFieldAnnotation .lucenefieldName (), searchString ),
630640 Occur .MUST );
631641 }
632642 }
633- }
643+
634644
635645 /**
636646 * This method adds the wild card query to the query builder when the
0 commit comments