2222#include < zim/archive.h>
2323#include < zim/suggestion.h>
2424#include < zim/item.h>
25+ #include < zim/error.h>
2526
2627#include < xapian.h>
2728
2829#include " tools.h"
2930#include " ../src/tools.h"
31+ #include " ../src/constants.h"
3032
3133#include " gtest/gtest.h"
3234
@@ -695,13 +697,72 @@ TEST(Suggestion, CJK) {
695697 );
696698}
697699
700+ std::string makeLongWord (size_t n) {
701+ std::ostringstream oss;
702+ oss << " awordthatis" << n << " characterslong" ;
703+ const std::string s = oss.str ();
704+ if ( s.size () > n )
705+ throw std::runtime_error (" That is not a request for a long enough word!" );
706+
707+ return s + std::string (n - s.size (), s.back ());
708+ }
709+
710+ void createASingleEntryZimArchive (const std::string& title)
711+ {
712+ TempZimArchiveMadeOfEmptyHtmlArticles tza (" en" , {{ " path" , title}});
713+ }
714+
715+ const size_t MAX_WORD_LENGTH = MAX_INDEXABLE_TITLE_WORD_SIZE;
716+
717+ TEST (Suggestion, handlingOfTooLongWords) {
718+ const std::string shortOfBeingTooLong = makeLongWord (MAX_WORD_LENGTH);
719+ const std::string tooLong = makeLongWord (MAX_WORD_LENGTH+1 );
720+
721+ std::vector<std::string> titlesWithTooMuchDiscardableStuff{
722+ tooLong,
723+ " Is " + tooLong + " too long?" ,
724+ " ;-) " + tooLong,
725+ " too much whitespace" + std::string (MAX_WORD_LENGTH, ' ' ),
726+ " too much punctuation" + std::string (MAX_WORD_LENGTH, ' !' ),
727+ };
728+
729+ for ( const std::string& title : titlesWithTooMuchDiscardableStuff ) {
730+ EXPECT_THROW (createASingleEntryZimArchive (title), zim::TitleIndexingError)
731+ << " title: " << title;
732+ }
733+
734+ TempZimArchiveMadeOfEmptyHtmlArticles tza (" en" , {
735+ // { path , title }
736+ { " path1" , shortOfBeingTooLong },
737+ { " path2" , " Is " + shortOfBeingTooLong + " too long?" },
738+ { " path3" , shortOfBeingTooLong + " " + shortOfBeingTooLong },
739+ });
740+
741+ zim::Archive archive (tza.getPath ());
742+ EXPECT_SUGGESTED_TITLES (archive, " long" ,
743+ " Is " + shortOfBeingTooLong + " too long?"
744+ );
745+
746+ EXPECT_SUGGESTED_TITLES (archive, " awordthatis" ,
747+ shortOfBeingTooLong + " " + shortOfBeingTooLong,
748+ shortOfBeingTooLong,
749+ " Is " + shortOfBeingTooLong + " too long?"
750+ );
751+ }
752+
698753TEST (Suggestion, titleEdgeCases) {
699754 TempZimArchiveMadeOfEmptyHtmlArticles tza (" en" , {
700755 // { path , title }
701756
702757 { " About" , " About" }, // Title identical to path
703758 { " Trout" , " trout" }, // Title differing from path in case only
704759 { " Without" , " " }, // No title
760+ //
761+ // Handling of pseudo-words consisting exclusively of punctuation
762+ { " winknsmilewithouttext" , " ;-)" }, // A punctuation-only title
763+ { " winknsmilebothways" , " ;-) wink'n'smile" },
764+ { " winknsmiletheotherwayaround" , " wink'n'smile ;-)" },
765+ { " winknsmilewithothernonwords" , " ~~ ;-) ~~" },
705766
706767 // Non edge cases
707768 { " Stout" , " About Rex Stout" },
@@ -727,6 +788,24 @@ TEST(Suggestion, titleEdgeCases) {
727788 EXPECT_SUGGESTED_TITLES (archive, " hang"
728789 /* nothing */
729790 );
791+
792+ EXPECT_SUGGESTED_TITLES (archive, " ;-" ,
793+ " ;-)" ,
794+ // The following results aren't included because ";-)" isn't treated as a
795+ // term in the presence of anything else:
796+ // - ";-) wink'n'smile"
797+ // - "wink'n'smile ;-)"
798+ // - "~~ ;-) ~~"
799+ );
800+
801+ EXPECT_SUGGESTED_TITLES (archive, " win" ,
802+ " ;-) wink'n'smile" ,
803+ " wink'n'smile ;-)"
804+ );
805+
806+ EXPECT_SUGGESTED_TITLES (archive, " smile" ,
807+ /* nothing */ // smile in "wink'n'smile" isn't a separate term
808+ );
730809}
731810
732811zim::Entry getTitleIndexEntry (const zim::Archive& a)
0 commit comments