|
27 | 27 |
|
28 | 28 | #include "tools.h" |
29 | 29 | #include "../src/tools.h" |
| 30 | +#include "../src/constants.h" |
30 | 31 |
|
31 | 32 | #include "gtest/gtest.h" |
32 | 33 |
|
@@ -705,28 +706,61 @@ std::string makeLongWord(size_t n) { |
705 | 706 | return s + std::string(n - s.size(), s.back()); |
706 | 707 | } |
707 | 708 |
|
708 | | -TEST(Suggestion, titleEdgeCases) { |
709 | | - const std::string shortOfBeingTooLong = makeLongWord(240); |
710 | | - const std::string tooLong = makeLongWord(241); |
| 709 | +void createASingleEntryZimArchive(const std::string& title) |
| 710 | +{ |
| 711 | + TempZimArchiveMadeOfEmptyHtmlArticles tza("en", {{ "path", title}}); |
| 712 | +} |
| 713 | + |
| 714 | +const size_t MAX_WORD_LENGTH = MAX_INDEXABLE_TITLE_WORD_SIZE; |
| 715 | + |
| 716 | +TEST(Suggestion, handlingOfTooLongWords) { |
| 717 | + const std::string shortOfBeingTooLong = makeLongWord(MAX_WORD_LENGTH); |
| 718 | + const std::string tooLong = makeLongWord(MAX_WORD_LENGTH+1); |
| 719 | + |
| 720 | + std::vector<std::string> titlesWithTooMuchDiscardableStuff{ |
| 721 | + tooLong, |
| 722 | + "Is " + tooLong + " too long?", |
| 723 | + ";-) " + tooLong, |
| 724 | + "too much whitespace" + std::string(MAX_WORD_LENGTH, ' '), |
| 725 | + "too much punctuation" + std::string(MAX_WORD_LENGTH, '!'), |
| 726 | + }; |
| 727 | + |
| 728 | + for ( const std::string& title : titlesWithTooMuchDiscardableStuff ) { |
| 729 | + EXPECT_THROW(createASingleEntryZimArchive(title), std::runtime_error) |
| 730 | + << "title: " << title; |
| 731 | + } |
711 | 732 |
|
| 733 | + TempZimArchiveMadeOfEmptyHtmlArticles tza("en", { |
| 734 | + // { path , title } |
| 735 | + { "path1", shortOfBeingTooLong }, |
| 736 | + { "path2", "Is " + shortOfBeingTooLong + " too long?" }, |
| 737 | + { "path3", shortOfBeingTooLong + " " + shortOfBeingTooLong }, |
| 738 | + }); |
| 739 | + |
| 740 | + zim::Archive archive(tza.getPath()); |
| 741 | + EXPECT_SUGGESTED_TITLES(archive, "long", |
| 742 | + "Is " + shortOfBeingTooLong + " too long?" |
| 743 | + ); |
| 744 | + |
| 745 | + EXPECT_SUGGESTED_TITLES(archive, "awordthatis", |
| 746 | + shortOfBeingTooLong + " " + shortOfBeingTooLong, |
| 747 | + shortOfBeingTooLong, |
| 748 | + "Is " + shortOfBeingTooLong + " too long?" |
| 749 | + ); |
| 750 | +} |
| 751 | + |
| 752 | +TEST(Suggestion, titleEdgeCases) { |
712 | 753 | TempZimArchiveMadeOfEmptyHtmlArticles tza("en", { |
713 | 754 | // { path , title } |
714 | 755 |
|
715 | 756 | { "About" , "About" }, // Title identical to path |
716 | 757 | { "Trout" , "trout" }, // Title differing from path in case only |
717 | 758 | { "Without", "" }, // No title |
718 | 759 | // |
719 | | - // Titles containing long words |
720 | | - { "toolongword1", "Is " + shortOfBeingTooLong + " too long?" }, |
721 | | - { "toolongword2", "Is " + tooLong + " too long?" }, |
722 | | - { "toolongsingleword1", shortOfBeingTooLong }, |
723 | | - { "toolongsingleword2", tooLong }, |
724 | | - |
725 | 760 | // Handling of pseudo-words consisting exclusively of punctuation |
726 | 761 | { "winknsmilewithouttext", ";-)" }, // A punctuation-only title |
727 | 762 | { "winknsmilebothways", ";-) wink'n'smile" }, |
728 | 763 | { "winknsmiletheotherwayaround", "wink'n'smile ;-)" }, |
729 | | - { "smilinglongword", ";-) " + tooLong }, |
730 | 764 | { "winknsmilewithothernonwords", "~~ ;-) ~~" }, |
731 | 765 |
|
732 | 766 | // Non edge cases |
@@ -754,28 +788,12 @@ TEST(Suggestion, titleEdgeCases) { |
754 | 788 | /* nothing */ |
755 | 789 | ); |
756 | 790 |
|
757 | | - EXPECT_SUGGESTED_TITLES(archive, "long", |
758 | | - "Is " + tooLong + " too long?", |
759 | | - "Is " + shortOfBeingTooLong + " too long?" |
760 | | - ); |
761 | | - |
762 | | - EXPECT_SUGGESTED_TITLES(archive, "awordthatis", |
763 | | - shortOfBeingTooLong, |
764 | | - "Is " + shortOfBeingTooLong + " too long?" |
765 | | - // The following results aren't included because tooLong has been ignored |
766 | | - // during indexing: |
767 | | - // - tooLong |
768 | | - // - "Is " + tooLong + " too long?" |
769 | | - // - ";-) " + tooLong |
770 | | - ); |
771 | | - |
772 | 791 | EXPECT_SUGGESTED_TITLES(archive, ";-", |
773 | 792 | ";-)", |
774 | 793 | // The following results aren't included because ";-)" isn't treated as a |
775 | 794 | // term in the presence of anything else: |
776 | 795 | // - ";-) wink'n'smile" |
777 | 796 | // - "wink'n'smile ;-)" |
778 | | - // - ";-) " + tooLong |
779 | 797 | // - "~~ ;-) ~~" |
780 | 798 | ); |
781 | 799 |
|
|
0 commit comments