update for 8.2.0
@@ -70,6 +70,15 @@ | ||
70 | 70 | <types> fieldType section |
71 | 71 | indexed: true if this field should be indexed (searchable or sortable) |
72 | 72 | stored: true if this field should be retrievable |
73 | + docValues: true if this field should have doc values. Doc values are | |
74 | + useful for faceting, grouping, sorting and function queries. Although not | |
75 | + required, doc values will make the index faster to load, more | |
76 | + NRT-friendly and more memory-efficient. They however come with some | |
77 | + limitations: they are currently only supported by StrField, UUIDField | |
78 | + and all Trie*Fields, and depending on the field type, they might | |
79 | + require the field to be single-valued, be required or have a default | |
80 | + value (check the documentation of the field type you're interested in | |
81 | + for more information) | |
73 | 82 | multiValued: true if this field may contain multiple values per document |
74 | 83 | omitNorms: (expert) set to true to omit the norms associated with |
75 | 84 | this field (this disables length normalization and index-time |
@@ -90,18 +99,18 @@ | ||
90 | 99 | when adding a document. |
91 | 100 | --> |
92 | 101 | |
93 | - <field name="id" type="string" stored="true" indexed="true" required="true" multiValued="false"/> | |
102 | + <field name="id" type="string" stored="true" indexed="true" required="true" multiValued="false" docValues="true" /> | |
94 | 103 | <!-- core fields --> |
95 | 104 | <field name="parentId" type="string" stored="true" indexed="true"/> |
96 | 105 | <field name="segment" type="string" stored="true" indexed="true"/> |
97 | 106 | <field name="digest" type="string" stored="true" indexed="false"/> |
98 | 107 | <field name="boost" type="float" stored="true" indexed="false"/> |
99 | - <field name="host" type="url" stored="true" indexed="true"/> | |
108 | + <field name="host" type="domain_name" stored="true" indexed="true"/> | |
100 | 109 | <field name="site" type="string" stored="true" indexed="false"/> |
101 | - <field name="url" type="url" stored="true" indexed="true" required="true"/> | |
110 | + <field name="url" type="string" stored="true" indexed="true" required="true"/> | |
102 | 111 | <field name="content" type="text" stored="true" indexed="true" termVectors="true"/> |
103 | 112 | <field name="title" type="text" stored="true" indexed="true" termVectors="true"/> |
104 | - <field name="cache" type="string" stored="true" indexed="false" compressed="true"/> | |
113 | + <field name="cache" type="string" stored="true" indexed="false"/> | |
105 | 114 | <field name="tstamp" type="tdate" stored="true" indexed="true"/> |
106 | 115 | <field name="anchor" type="string" stored="true" indexed="true" multiValued="true"/> |
107 | 116 | <field name="contentLength" type="tlong" stored="true" indexed="true"/> |
@@ -397,6 +406,15 @@ | ||
397 | 406 | <dynamicField name="*_pf" type="pfloat" indexed="true" stored="true"/> |
398 | 407 | <dynamicField name="*_pd" type="pdouble" indexed="true" stored="true"/> |
399 | 408 | <dynamicField name="*_pdt" type="pdate" indexed="true" stored="true"/> |
409 | + | |
410 | + <!-- Suggest --> | |
411 | + <dynamicField name="*_ts" type="text_suggest" indexed="true" stored="true"/> | |
412 | + <dynamicField name="*_ss" type="string_suggest" indexed="true" stored="true"/> | |
413 | +<!-- | |
414 | + <copyField source="content" dest="content_ts"/> | |
415 | + <copyField source="title" dest="title_ss"/> | |
416 | +--> | |
417 | + | |
400 | 418 | </fields> |
401 | 419 | |
402 | 420 | <!-- Field to use to determine and enforce document uniqueness. |
@@ -413,7 +431,10 @@ | ||
413 | 431 | standard package such as org.apache.solr.analysis |
414 | 432 | --> |
415 | 433 | |
416 | - <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> | |
434 | + <!-- The StrField type is not analyzed, but indexed/stored verbatim. | |
435 | + It supports doc values but in that case the field needs to be | |
436 | + single-valued and either required or have a default value. | |
437 | + --> | |
417 | 438 | <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> |
418 | 439 | |
419 | 440 | <!-- boolean type: "true" or "false" --> |
@@ -437,6 +458,9 @@ | ||
437 | 458 | |
438 | 459 | <!-- |
439 | 460 | Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types. |
461 | + | |
462 | + These fields support doc values, but they require the field to be | |
463 | + single-valued and either be required or have a default value. | |
440 | 464 | --> |
441 | 465 | <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> |
442 | 466 | <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> |
@@ -552,7 +576,7 @@ | ||
552 | 576 | <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> |
553 | 577 | <analyzer type="index"> |
554 | 578 | <tokenizer class="solr.StandardTokenizerFactory"/> |
555 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
579 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | |
556 | 580 | <!-- in this example, we will only use synonyms at query time |
557 | 581 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
558 | 582 | --> |
@@ -560,7 +584,7 @@ | ||
560 | 584 | </analyzer> |
561 | 585 | <analyzer type="query"> |
562 | 586 | <tokenizer class="solr.StandardTokenizerFactory"/> |
563 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
587 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | |
564 | 588 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
565 | 589 | <filter class="solr.LowerCaseFilterFactory"/> |
566 | 590 | </analyzer> |
@@ -578,13 +602,10 @@ | ||
578 | 602 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
579 | 603 | --> |
580 | 604 | <!-- Case insensitive stop word removal. |
581 | - add enablePositionIncrements=true in both the index and query | |
582 | - analyzers to leave a 'gap' for more accurate phrase queries. | |
583 | 605 | --> |
584 | 606 | <filter class="solr.StopFilterFactory" |
585 | 607 | ignoreCase="true" |
586 | 608 | words="lang/stopwords_en.txt" |
587 | - enablePositionIncrements="true" | |
588 | 609 | /> |
589 | 610 | <filter class="solr.LowerCaseFilterFactory"/> |
590 | 611 | <filter class="solr.EnglishPossessiveFilterFactory"/> |
@@ -600,7 +621,6 @@ | ||
600 | 621 | <filter class="solr.StopFilterFactory" |
601 | 622 | ignoreCase="true" |
602 | 623 | words="lang/stopwords_en.txt" |
603 | - enablePositionIncrements="true" | |
604 | 624 | /> |
605 | 625 | <filter class="solr.LowerCaseFilterFactory"/> |
606 | 626 | <filter class="solr.EnglishPossessiveFilterFactory"/> |
@@ -628,13 +648,10 @@ | ||
628 | 648 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
629 | 649 | --> |
630 | 650 | <!-- Case insensitive stop word removal. |
631 | - add enablePositionIncrements=true in both the index and query | |
632 | - analyzers to leave a 'gap' for more accurate phrase queries. | |
633 | 651 | --> |
634 | 652 | <filter class="solr.StopFilterFactory" |
635 | 653 | ignoreCase="true" |
636 | 654 | words="lang/stopwords_en.txt" |
637 | - enablePositionIncrements="true" | |
638 | 655 | /> |
639 | 656 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> |
640 | 657 | <filter class="solr.LowerCaseFilterFactory"/> |
@@ -647,7 +664,6 @@ | ||
647 | 664 | <filter class="solr.StopFilterFactory" |
648 | 665 | ignoreCase="true" |
649 | 666 | words="lang/stopwords_en.txt" |
650 | - enablePositionIncrements="true" | |
651 | 667 | /> |
652 | 668 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
653 | 669 | <filter class="solr.LowerCaseFilterFactory"/> |
@@ -678,7 +694,7 @@ | ||
678 | 694 | <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> |
679 | 695 | <analyzer type="index"> |
680 | 696 | <tokenizer class="solr.StandardTokenizerFactory"/> |
681 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
697 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | |
682 | 698 | <filter class="solr.LowerCaseFilterFactory"/> |
683 | 699 | <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" |
684 | 700 | maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> |
@@ -686,7 +702,7 @@ | ||
686 | 702 | <analyzer type="query"> |
687 | 703 | <tokenizer class="solr.StandardTokenizerFactory"/> |
688 | 704 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
689 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> | |
705 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | |
690 | 706 | <filter class="solr.LowerCaseFilterFactory"/> |
691 | 707 | </analyzer> |
692 | 708 | </fieldType> |
@@ -840,7 +856,7 @@ | ||
840 | 856 | <tokenizer class="solr.StandardTokenizerFactory"/> |
841 | 857 | <!-- for any non-arabic --> |
842 | 858 | <filter class="solr.LowerCaseFilterFactory"/> |
843 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/> | |
859 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> | |
844 | 860 | <!-- normalizes ﻯ to ﻱ, etc --> |
845 | 861 | <filter class="solr.ArabicNormalizationFilterFactory"/> |
846 | 862 | <filter class="solr.ArabicStemFilterFactory"/> |
@@ -852,7 +868,7 @@ | ||
852 | 868 | <analyzer> |
853 | 869 | <tokenizer class="solr.StandardTokenizerFactory"/> |
854 | 870 | <filter class="solr.LowerCaseFilterFactory"/> |
855 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/> | |
871 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> | |
856 | 872 | <filter class="solr.BulgarianStemFilterFactory"/> |
857 | 873 | </analyzer> |
858 | 874 | </fieldType> |
@@ -864,7 +880,7 @@ | ||
864 | 880 | <!-- removes l', etc --> |
865 | 881 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> |
866 | 882 | <filter class="solr.LowerCaseFilterFactory"/> |
867 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/> | |
883 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> | |
868 | 884 | <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> |
869 | 885 | </analyzer> |
870 | 886 | </fieldType> |
@@ -886,7 +902,7 @@ | ||
886 | 902 | <analyzer> |
887 | 903 | <tokenizer class="solr.StandardTokenizerFactory"/> |
888 | 904 | <filter class="solr.LowerCaseFilterFactory"/> |
889 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/> | |
905 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> | |
890 | 906 | <filter class="solr.CzechStemFilterFactory"/> |
891 | 907 | </analyzer> |
892 | 908 | </fieldType> |
@@ -896,7 +912,7 @@ | ||
896 | 912 | <analyzer> |
897 | 913 | <tokenizer class="solr.StandardTokenizerFactory"/> |
898 | 914 | <filter class="solr.LowerCaseFilterFactory"/> |
899 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/> | |
915 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> | |
900 | 916 | <filter class="solr.SnowballPorterFilterFactory" language="Danish"/> |
901 | 917 | </analyzer> |
902 | 918 | </fieldType> |
@@ -906,7 +922,7 @@ | ||
906 | 922 | <analyzer> |
907 | 923 | <tokenizer class="solr.StandardTokenizerFactory"/> |
908 | 924 | <filter class="solr.LowerCaseFilterFactory"/> |
909 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/> | |
925 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> | |
910 | 926 | <filter class="solr.GermanNormalizationFilterFactory"/> |
911 | 927 | <filter class="solr.GermanLightStemFilterFactory"/> |
912 | 928 | <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> |
@@ -920,7 +936,7 @@ | ||
920 | 936 | <tokenizer class="solr.StandardTokenizerFactory"/> |
921 | 937 | <!-- greek specific lowercase for sigma --> |
922 | 938 | <filter class="solr.GreekLowerCaseFilterFactory"/> |
923 | - <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/> | |
939 | + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> | |
924 | 940 | <filter class="solr.GreekStemFilterFactory"/> |
925 | 941 | </analyzer> |
926 | 942 | </fieldType> |
@@ -930,7 +946,7 @@ | ||
930 | 946 | <analyzer> |
931 | 947 | <tokenizer class="solr.StandardTokenizerFactory"/> |
932 | 948 | <filter class="solr.LowerCaseFilterFactory"/> |
933 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/> | |
949 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> | |
934 | 950 | <filter class="solr.SpanishLightStemFilterFactory"/> |
935 | 951 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> |
936 | 952 | </analyzer> |
@@ -941,7 +957,7 @@ | ||
941 | 957 | <analyzer> |
942 | 958 | <tokenizer class="solr.StandardTokenizerFactory"/> |
943 | 959 | <filter class="solr.LowerCaseFilterFactory"/> |
944 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/> | |
960 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> | |
945 | 961 | <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> |
946 | 962 | </analyzer> |
947 | 963 | </fieldType> |
@@ -955,7 +971,7 @@ | ||
955 | 971 | <filter class="solr.LowerCaseFilterFactory"/> |
956 | 972 | <filter class="solr.ArabicNormalizationFilterFactory"/> |
957 | 973 | <filter class="solr.PersianNormalizationFilterFactory"/> |
958 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/> | |
974 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> | |
959 | 975 | </analyzer> |
960 | 976 | </fieldType> |
961 | 977 |
@@ -964,7 +980,7 @@ | ||
964 | 980 | <analyzer> |
965 | 981 | <tokenizer class="solr.StandardTokenizerFactory"/> |
966 | 982 | <filter class="solr.LowerCaseFilterFactory"/> |
967 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/> | |
983 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> | |
968 | 984 | <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> |
969 | 985 | <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> |
970 | 986 | </analyzer> |
@@ -977,7 +993,7 @@ | ||
977 | 993 | <!-- removes l', etc --> |
978 | 994 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> |
979 | 995 | <filter class="solr.LowerCaseFilterFactory"/> |
980 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/> | |
996 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> | |
981 | 997 | <filter class="solr.FrenchLightStemFilterFactory"/> |
982 | 998 | <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> |
983 | 999 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> |
@@ -991,9 +1007,9 @@ | ||
991 | 1007 | <!-- removes d', etc --> |
992 | 1008 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> |
993 | 1009 | <!-- removes n-, etc. position increments is intentionally false! --> |
994 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/> | |
1010 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/> | |
995 | 1011 | <filter class="solr.IrishLowerCaseFilterFactory"/> |
996 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/> | |
1012 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/> | |
997 | 1013 | <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> |
998 | 1014 | </analyzer> |
999 | 1015 | </fieldType> |
@@ -1003,7 +1019,7 @@ | ||
1003 | 1019 | <analyzer> |
1004 | 1020 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1005 | 1021 | <filter class="solr.LowerCaseFilterFactory"/> |
1006 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/> | |
1022 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> | |
1007 | 1023 | <filter class="solr.GalicianStemFilterFactory"/> |
1008 | 1024 | <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> |
1009 | 1025 | </analyzer> |
@@ -1018,7 +1034,7 @@ | ||
1018 | 1034 | <filter class="solr.IndicNormalizationFilterFactory"/> |
1019 | 1035 | <!-- normalizes variation in spelling --> |
1020 | 1036 | <filter class="solr.HindiNormalizationFilterFactory"/> |
1021 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/> | |
1037 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> | |
1022 | 1038 | <filter class="solr.HindiStemFilterFactory"/> |
1023 | 1039 | </analyzer> |
1024 | 1040 | </fieldType> |
@@ -1028,7 +1044,7 @@ | ||
1028 | 1044 | <analyzer> |
1029 | 1045 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1030 | 1046 | <filter class="solr.LowerCaseFilterFactory"/> |
1031 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/> | |
1047 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> | |
1032 | 1048 | <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> |
1033 | 1049 | <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> |
1034 | 1050 | </analyzer> |
@@ -1039,7 +1055,7 @@ | ||
1039 | 1055 | <analyzer> |
1040 | 1056 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1041 | 1057 | <filter class="solr.LowerCaseFilterFactory"/> |
1042 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/> | |
1058 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> | |
1043 | 1059 | <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> |
1044 | 1060 | </analyzer> |
1045 | 1061 | </fieldType> |
@@ -1049,7 +1065,7 @@ | ||
1049 | 1065 | <analyzer> |
1050 | 1066 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1051 | 1067 | <filter class="solr.LowerCaseFilterFactory"/> |
1052 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/> | |
1068 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> | |
1053 | 1069 | <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> |
1054 | 1070 | <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> |
1055 | 1071 | </analyzer> |
@@ -1062,7 +1078,7 @@ | ||
1062 | 1078 | <!-- removes l', etc --> |
1063 | 1079 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> |
1064 | 1080 | <filter class="solr.LowerCaseFilterFactory"/> |
1065 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/> | |
1081 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> | |
1066 | 1082 | <filter class="solr.ItalianLightStemFilterFactory"/> |
1067 | 1083 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> |
1068 | 1084 | </analyzer> |
@@ -1109,11 +1125,11 @@ | ||
1109 | 1125 | <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> |
1110 | 1126 | <filter class="solr.JapaneseBaseFormFilterFactory"/> |
1111 | 1127 | <!-- Removes tokens with certain part-of-speech tags --> |
1112 | - <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="false"/> | |
1128 | + <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> | |
1113 | 1129 | <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> |
1114 | 1130 | <filter class="solr.CJKWidthFilterFactory"/> |
1115 | 1131 | <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> |
1116 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="false" /> | |
1132 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> | |
1117 | 1133 | <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> |
1118 | 1134 | <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> |
1119 | 1135 | <!-- Lower-cases romaji characters --> |
@@ -1126,7 +1142,7 @@ | ||
1126 | 1142 | <analyzer> |
1127 | 1143 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1128 | 1144 | <filter class="solr.LowerCaseFilterFactory"/> |
1129 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/> | |
1145 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> | |
1130 | 1146 | <filter class="solr.LatvianStemFilterFactory"/> |
1131 | 1147 | </analyzer> |
1132 | 1148 | </fieldType> |
@@ -1136,7 +1152,7 @@ | ||
1136 | 1152 | <analyzer> |
1137 | 1153 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1138 | 1154 | <filter class="solr.LowerCaseFilterFactory"/> |
1139 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/> | |
1155 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> | |
1140 | 1156 | <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> |
1141 | 1157 | <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> |
1142 | 1158 | </analyzer> |
@@ -1147,10 +1163,11 @@ | ||
1147 | 1163 | <analyzer> |
1148 | 1164 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1149 | 1165 | <filter class="solr.LowerCaseFilterFactory"/> |
1150 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/> | |
1166 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> | |
1151 | 1167 | <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> |
1152 | - <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> --> | |
1153 | - <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> --> | |
1168 | + <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> --> | |
1169 | + <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> --> | |
1170 | + <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both --> | |
1154 | 1171 | </analyzer> |
1155 | 1172 | </fieldType> |
1156 | 1173 |
@@ -1159,7 +1176,7 @@ | ||
1159 | 1176 | <analyzer> |
1160 | 1177 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1161 | 1178 | <filter class="solr.LowerCaseFilterFactory"/> |
1162 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/> | |
1179 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> | |
1163 | 1180 | <filter class="solr.PortugueseLightStemFilterFactory"/> |
1164 | 1181 | <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> |
1165 | 1182 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> |
@@ -1172,7 +1189,7 @@ | ||
1172 | 1189 | <analyzer> |
1173 | 1190 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1174 | 1191 | <filter class="solr.LowerCaseFilterFactory"/> |
1175 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/> | |
1192 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> | |
1176 | 1193 | <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> |
1177 | 1194 | </analyzer> |
1178 | 1195 | </fieldType> |
@@ -1182,7 +1199,7 @@ | ||
1182 | 1199 | <analyzer> |
1183 | 1200 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1184 | 1201 | <filter class="solr.LowerCaseFilterFactory"/> |
1185 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/> | |
1202 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> | |
1186 | 1203 | <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> |
1187 | 1204 | <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> |
1188 | 1205 | </analyzer> |
@@ -1193,7 +1210,7 @@ | ||
1193 | 1210 | <analyzer> |
1194 | 1211 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1195 | 1212 | <filter class="solr.LowerCaseFilterFactory"/> |
1196 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/> | |
1213 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> | |
1197 | 1214 | <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> |
1198 | 1215 | <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> |
1199 | 1216 | </analyzer> |
@@ -1205,7 +1222,7 @@ | ||
1205 | 1222 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1206 | 1223 | <filter class="solr.LowerCaseFilterFactory"/> |
1207 | 1224 | <filter class="solr.ThaiWordFilterFactory"/> |
1208 | - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/> | |
1225 | + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" /> | |
1209 | 1226 | </analyzer> |
1210 | 1227 | </fieldType> |
1211 | 1228 |
@@ -1214,7 +1231,7 @@ | ||
1214 | 1231 | <analyzer> |
1215 | 1232 | <tokenizer class="solr.StandardTokenizerFactory"/> |
1216 | 1233 | <filter class="solr.TurkishLowerCaseFilterFactory"/> |
1217 | - <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/> | |
1234 | + <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> | |
1218 | 1235 | <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> |
1219 | 1236 | </analyzer> |
1220 | 1237 | </fieldType> |
@@ -1239,6 +1256,27 @@ | ||
1239 | 1256 | </analyzer> |
1240 | 1257 | </fieldType> |
1241 | 1258 | |
1259 | + <fieldType name="text_suggest" class="solr.TextField" positionIncrementGap="100"> | |
1260 | + <analyzer> | |
1261 | + <tokenizer class="jp.sf.fess.solr.plugin.analysis.SuggestTextTokenizerFactory" maxLength="10000" userDictionary="lang/userdict_suggest_ja.txt" userDictionaryEncoding="UTF-8" includePartOfSpeech="start:名詞,start:接頭詞,start:形容詞,middle:名詞,middle:接頭詞,middle:形容詞" excludePartOfSpeech="start:副詞可能" includeCharTerm="middle:な" preConverters="[{"class":"jp.sf.fess.suggest.converter.ICUConverter", "args":["Fullwidth-Halfwidth"]}, {"class":"jp.sf.fess.suggest.converter.ICUConverter", "args":["Any-Lower"]}, {"class":"jp.sf.fess.suggest.converter.NormalizeConverter"}]" converters="[{"class":"jp.sf.fess.suggest.converter.SymbolConverter", "method":[{"name":"addSymbol", "args":[["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", " ", " ", "【", "】", "・", "(", ")", "(", ")", "%", "%", "~", "+", "#", "#", "\"", "”", "'", "\\", "¥", "[", "]", "「", "」", "『", "』", "<", ">", "<", ">", "/", "/", "{", "}", "&", "&", "Ⅰ", "Ⅱ", "Ⅲ", "Ⅳ", "Ⅴ", "Ⅵ", "Ⅶ", "Ⅷ", "Ⅸ", "Ⅹ", "ⅰ", "ⅱ", "ⅲ", "ⅳ", "ⅴ", "ⅵ", "ⅶ", "ⅷ", "ⅸ", "ⅹ", "-", "-", "."]]}]}]"/> | |
1262 | + </analyzer> | |
1263 | + </fieldType> | |
1264 | + | |
1265 | + <fieldType name="string_suggest" class="solr.TextField" positionIncrementGap="100"> | |
1266 | + <analyzer> | |
1267 | + <tokenizer class="jp.sf.fess.solr.plugin.analysis.SuggestStringTokenizerFactory" userDictionary="lang/userdict_suggest_ja.txt" userDictionaryEncoding="UTF-8" preConverters="[{"class":"jp.sf.fess.suggest.converter.ICUConverter", "args":["Fullwidth-Halfwidth"]}, {"class":"jp.sf.fess.suggest.converter.ICUConverter", "args":["Any-Lower"]}, {"class":"jp.sf.fess.suggest.converter.NormalizeConverter"}]" converters="[{"class":"jp.sf.fess.suggest.converter.SymbolConverter", "method":[{"name":"addSymbol", "args":[["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", " ", " ", "【", "】", "・", "(", ")", "(", ")", "%", "%", "~", "+", "#", "#", "\"", "”", "'", "\\", "¥", "[", "]", "「", "」", "『", "』", "<", ">", "<", ">", "/", "/", "{", "}", "&", "&", "Ⅰ", "Ⅱ", "Ⅲ", "Ⅳ", "Ⅴ", "Ⅵ", "Ⅶ", "Ⅷ", "Ⅸ", "Ⅹ", "ⅰ", "ⅱ", "ⅲ", "ⅳ", "ⅴ", "ⅵ", "ⅶ", "ⅷ", "ⅸ", "ⅹ", "-", "-", "." ]]}]}]"/> | |
1268 | + </analyzer> | |
1269 | + </fieldType> | |
1270 | + | |
1271 | + <fieldType name="domain_name" class="solr.TextField"> | |
1272 | + <analyzer type="index"> | |
1273 | + <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="." reverse="true"/> | |
1274 | + </analyzer> | |
1275 | + <analyzer type="query"> | |
1276 | + <tokenizer class="solr.KeywordTokenizerFactory" /> | |
1277 | + </analyzer> | |
1278 | + </fieldType> | |
1279 | + | |
1242 | 1280 | </types> |
1243 | 1281 | |
1244 | 1282 | <!-- Similarity is the scoring routine for each document vs. a query. |
@@ -35,7 +35,7 @@ | ||
35 | 35 | that you fully re-index after changing this setting as it can |
36 | 36 | affect both how text is indexed and queried. |
37 | 37 | --> |
38 | - <luceneMatchVersion>LUCENE_41</luceneMatchVersion> | |
38 | + <luceneMatchVersion>4.4</luceneMatchVersion> | |
39 | 39 | |
40 | 40 | <!-- <lib/> directives can be used to instruct Solr to load an Jars |
41 | 41 | identified and use them to resolve any "plugins" specified in |
@@ -66,6 +66,9 @@ | ||
66 | 66 | files in that directory which completely match the regex |
67 | 67 | (anchored on both ends) will be included. |
68 | 68 | |
69 | + If a 'dir' option (with or without a regex) is used and nothing | |
70 | + is found that matches, a warning will be logged. | |
71 | + | |
69 | 72 | The examples below can be used to load some solr-contribs along |
70 | 73 | with their external dependencies. |
71 | 74 | --> |
@@ -81,13 +84,6 @@ | ||
81 | 84 | <lib dir="../contrib/velocity/lib" regex=".*\.jar" /> |
82 | 85 | <lib dir="../dist/" regex="solr-velocity-\d.*\.jar" /> |
83 | 86 | |
84 | -<lib dir="../lib" /> | |
85 | - | |
86 | - <!-- If a 'dir' option (with or without a regex) is used and nothing | |
87 | - is found that matches, it will be ignored | |
88 | - --> | |
89 | - <lib dir="/total/crap/dir/ignored" /> | |
90 | - | |
91 | 87 | <!-- an exact 'path' can be used instead of a 'dir' to specify a |
92 | 88 | specific jar file. This will cause a serious error to be logged |
93 | 89 | if it can't be loaded. |
@@ -123,6 +119,39 @@ | ||
123 | 119 | <directoryFactory name="DirectoryFactory" |
124 | 120 | class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> |
125 | 121 | |
122 | + <!-- The CodecFactory for defining the format of the inverted index. | |
123 | + The default implementation is SchemaCodecFactory, which is the official Lucene | |
124 | + index format, but hooks into the schema to provide per-field customization of | |
125 | + the postings lists and per-document values in the fieldType element | |
126 | + (postingsFormat/docValuesFormat). Note that most of the alternative implementations | |
127 | + are experimental, so if you choose to customize the index format, its a good | |
128 | + idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader) | |
129 | + before upgrading to a newer version to avoid unnecessary reindexing. | |
130 | + --> | |
131 | + <codecFactory class="solr.SchemaCodecFactory"/> | |
132 | + | |
133 | + <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>: | |
134 | + | |
135 | + <schemaFactory class="ManagedIndexSchemaFactory"> | |
136 | + <bool name="mutable">true</bool> | |
137 | + <str name="managedSchemaResourceName">managed-schema</str> | |
138 | + </schemaFactory> | |
139 | + | |
140 | + When ManagedIndexSchemaFactory is specified, Solr will load the schema from | |
141 | + he resource named in 'managedSchemaResourceName', rather than from schema.xml. | |
142 | + Note that the managed schema resource CANNOT be named schema.xml. If the managed | |
143 | + schema does not exist, Solr will create it after reading schema.xml, then rename | |
144 | + 'schema.xml' to 'schema.xml.bak'. | |
145 | + | |
146 | + Do NOT hand edit the managed schema - external modifications will be ignored and | |
147 | + overwritten as a result of schema modification REST API calls. | |
148 | + | |
149 | + When ManagedIndexSchemaFactory is specified with mutable = true, schema | |
150 | + modification REST API calls will be allowed; otherwise, error responses will be | |
151 | + sent back for these requests. | |
152 | + --> | |
153 | + <schemaFactory class="ClassicIndexSchemaFactory"/> | |
154 | + | |
126 | 155 | <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
127 | 156 | Index Config - These settings control low-level behavior of indexing |
128 | 157 | Most example settings here show the default value, but are commented |
@@ -154,7 +183,8 @@ | ||
154 | 183 | maxBufferedDocs sets a limit on the number of documents buffered |
155 | 184 | before flushing. |
156 | 185 | If both ramBufferSizeMB and maxBufferedDocs is set, then |
157 | - Lucene will flush based on whichever limit is hit first. --> | |
186 | + Lucene will flush based on whichever limit is hit first. | |
187 | + The default is 100 MB. --> | |
158 | 188 | <!-- <ramBufferSizeMB>100</ramBufferSizeMB> --> |
159 | 189 | <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> |
160 | 190 |
@@ -212,7 +242,7 @@ | ||
212 | 242 | More details on the nuances of each LockFactory... |
213 | 243 | http://wiki.apache.org/lucene-java/AvailableLockFactories |
214 | 244 | --> |
215 | - <!-- <lockType>native</lockType> --> | |
245 | + <lockType>${solr.lock.type:native}</lockType> | |
216 | 246 | |
217 | 247 | <!-- Unlock On Startup |
218 | 248 |
@@ -221,7 +251,7 @@ | ||
221 | 251 | processes to safely access a lucene index, and should be used |
222 | 252 | with care. Default is "false". |
223 | 253 | |
224 | - This is not needed if lock type is 'none' or 'single' | |
254 | + This is not needed if lock type is 'single' | |
225 | 255 | --> |
226 | 256 | <!-- |
227 | 257 | <unlockOnStartup>false</unlockOnStartup> |
@@ -240,12 +270,9 @@ | ||
240 | 270 | --> |
241 | 271 | |
242 | 272 | <!-- Commit Deletion Policy |
243 | - | |
244 | 273 | Custom deletion policies can be specified here. The class must |
245 | 274 | implement org.apache.lucene.index.IndexDeletionPolicy. |
246 | 275 | |
247 | - http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html | |
248 | - | |
249 | 276 | The default Solr IndexDeletionPolicy implementation supports |
250 | 277 | deleting index commit points on number of commits, age of |
251 | 278 | commit point and optimized status. |
@@ -277,10 +304,11 @@ | ||
277 | 304 | To aid in advanced debugging, Lucene provides an "InfoStream" |
278 | 305 | of detailed information when indexing. |
279 | 306 | |
280 | - Setting The value to true will instruct the underlying Lucene | |
281 | - IndexWriter to write its debugging info the specified file | |
307 | + Setting the value to true will instruct the underlying Lucene | |
308 | + IndexWriter to write its info stream to solr's log. By default, | |
309 | + this is enabled here, and controlled through log4j.properties. | |
282 | 310 | --> |
283 | - <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> | |
311 | + <infoStream>true</infoStream> | |
284 | 312 | </indexConfig> |
285 | 313 | |
286 | 314 |
@@ -327,7 +355,7 @@ | ||
327 | 355 | commit before automatically triggering a new commit. |
328 | 356 | |
329 | 357 | maxTime - Maximum amount of time in ms that is allowed to pass |
330 | - since a document was added before automaticly | |
358 | + since a document was added before automatically | |
331 | 359 | triggering a new commit. |
332 | 360 | openSearcher - if false, the commit causes recent index changes |
333 | 361 | to be flushed to stable storage, but does not cause a new |
@@ -338,7 +366,7 @@ | ||
338 | 366 | --> |
339 | 367 | <!-- |
340 | 368 | <autoCommit> |
341 | - <maxTime>15000</maxTime> | |
369 | + <maxTime>${solr.autoCommit.maxTime:15000}</maxTime> | |
342 | 370 | <openSearcher>false</openSearcher> |
343 | 371 | </autoCommit> |
344 | 372 | --> |
@@ -349,9 +377,9 @@ | ||
349 | 377 | faster and more near-realtime friendly than a hard commit. |
350 | 378 | --> |
351 | 379 | <!-- |
352 | - <autoSoftCommit> | |
353 | - <maxTime>1000</maxTime> | |
354 | - </autoSoftCommit> | |
380 | + <autoSoftCommit> | |
381 | + <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime> | |
382 | + </autoSoftCommit> | |
355 | 383 | --> |
356 | 384 | |
357 | 385 | <!-- Update Related Event Listeners |
@@ -673,6 +701,13 @@ | ||
673 | 701 | POST. You can use POST to pass request parameters not |
674 | 702 | fitting into the URL. |
675 | 703 | |
704 | + addHttpRequestToContext - if set to true, it will instruct | |
705 | + the requestParsers to include the original HttpServletRequest | |
706 | + object in the context map of the SolrQueryRequest under the | |
707 | + key "httpRequest". It will not be used by any of the existing | |
708 | + Solr components, but may be useful when developing custom | |
709 | + plugins. | |
710 | + | |
676 | 711 | *** WARNING *** |
677 | 712 | The settings below authorize Solr to fetch remote files, You |
678 | 713 | should make sure your system has some authentication before |
@@ -681,7 +716,8 @@ | ||
681 | 716 | --> |
682 | 717 | <requestParsers enableRemoteStreaming="true" |
683 | 718 | multipartUploadLimitInKB="2048000" |
684 | - formdataUploadLimitInKB="2048"/> | |
719 | + formdataUploadLimitInKB="2048" | |
720 | + addHttpRequestToContext="false"/> | |
685 | 721 | |
686 | 722 | <!-- HTTP Caching |
687 | 723 |
@@ -971,7 +1007,12 @@ | ||
971 | 1007 | updateRequestProcessorChains that can be used by name |
972 | 1008 | on each Update Request |
973 | 1009 | --> |
1010 | + <!-- | |
974 | 1011 | <lst name="defaults"> |
1012 | + <str name="update.chain">dedupe</str> | |
1013 | + </lst> | |
1014 | + --> | |
1015 | + <lst name="defaults"> | |
975 | 1016 | <str name="update.chain">langid</str> |
976 | 1017 | </lst> |
977 | 1018 | </requestHandler> |
@@ -1125,7 +1166,7 @@ | ||
1125 | 1166 | |
1126 | 1167 | http://wiki.apache.org/solr/SolrReplication |
1127 | 1168 | |
1128 | - It is also neccessary for SolrCloud to function (in Cloud mode, the | |
1169 | + It is also necessary for SolrCloud to function (in Cloud mode, the | |
1129 | 1170 | replication handler is used to bulk transfer segments when nodes |
1130 | 1171 | are added or need to recover). |
1131 | 1172 |
@@ -1134,7 +1175,7 @@ | ||
1134 | 1175 | <requestHandler name="/replication" class="solr.ReplicationHandler" > |
1135 | 1176 | <!-- |
1136 | 1177 | To enable simple master/slave replication, uncomment one of the |
1137 | - sections below, depending on wether this solr instance should be | |
1178 | + sections below, depending on whether this solr instance should be | |
1138 | 1179 | the "master" or a "slave". If this instance is a "slave" you will |
1139 | 1180 | also need to fill in the masterUrl to point to a real machine. |
1140 | 1181 | --> |
@@ -1205,7 +1246,7 @@ | ||
1205 | 1246 | --> |
1206 | 1247 | <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> |
1207 | 1248 | |
1208 | - <str name="queryAnalyzerFieldType">textSpell</str> | |
1249 | + <str name="queryAnalyzerFieldType">content</str> | |
1209 | 1250 | |
1210 | 1251 | <!-- Multiple "Spell Checkers" can be declared and used by this |
1211 | 1252 | component |
@@ -1214,7 +1255,7 @@ | ||
1214 | 1255 | <!-- a spellchecker built from a field of the main index --> |
1215 | 1256 | <lst name="spellchecker"> |
1216 | 1257 | <str name="name">default</str> |
1217 | - <str name="field">name</str> | |
1258 | + <str name="field">content</str> | |
1218 | 1259 | <str name="classname">solr.DirectSolrSpellChecker</str> |
1219 | 1260 | <!-- the spellcheck distance measure used, the default is the internal levenshtein --> |
1220 | 1261 | <str name="distanceMeasure">internal</str> |
@@ -1368,6 +1409,52 @@ | ||
1368 | 1409 | <str>suggestContentJa</str> |
1369 | 1410 | </arr> |
1370 | 1411 | </requestHandler> |
1412 | + <!-- content_ts field --> | |
1413 | + <searchComponent class="solr.SpellCheckComponent" name="suggestContentTs"> | |
1414 | + <lst name="spellchecker"> | |
1415 | + <str name="name">suggestContentTs</str> | |
1416 | + <str name="classname">org.apache.solr.spelling.suggest.Suggester</str> | |
1417 | + <str name="lookupImpl">org.apache.solr.spelling.suggest.fst.WFSTLookupFactory</str> | |
1418 | + <str name="field">content_ts</str> | |
1419 | + <float name="threshold">0.005</float> | |
1420 | + <str name="buildOnCommit">true</str> | |
1421 | + </lst> | |
1422 | + </searchComponent> | |
1423 | + <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest/content_ts"> | |
1424 | + <lst name="defaults"> | |
1425 | + <str name="spellcheck">true</str> | |
1426 | + <str name="spellcheck.dictionary">suggestContentTs</str> | |
1427 | + <str name="spellcheck.onlyMorePopular">true</str> | |
1428 | + <str name="spellcheck.count">5</str> | |
1429 | + <str name="spellcheck.collate">true</str> | |
1430 | + </lst> | |
1431 | + <arr name="components"> | |
1432 | + <str>suggestContentTs</str> | |
1433 | + </arr> | |
1434 | + </requestHandler> | |
1435 | + <!-- title_ss field --> | |
1436 | + <searchComponent class="solr.SpellCheckComponent" name="suggestTitleSs"> | |
1437 | + <lst name="spellchecker"> | |
1438 | + <str name="name">suggestTitleSs</str> | |
1439 | + <str name="classname">org.apache.solr.spelling.suggest.Suggester</str> | |
1440 | + <str name="lookupImpl">org.apache.solr.spelling.suggest.fst.WFSTLookupFactory</str> | |
1441 | + <str name="field">content_ja</str> | |
1442 | + <float name="threshold">0.005</float> | |
1443 | + <str name="buildOnCommit">true</str> | |
1444 | + </lst> | |
1445 | + </searchComponent> | |
1446 | + <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest/title_ss"> | |
1447 | + <lst name="defaults"> | |
1448 | + <str name="spellcheck">true</str> | |
1449 | + <str name="spellcheck.dictionary">suggestTitleSs</str> | |
1450 | + <str name="spellcheck.onlyMorePopular">true</str> | |
1451 | + <str name="spellcheck.count">5</str> | |
1452 | + <str name="spellcheck.collate">true</str> | |
1453 | + </lst> | |
1454 | + <arr name="components"> | |
1455 | + <str>suggestTitleSs</str> | |
1456 | + </arr> | |
1457 | + </requestHandler> | |
1371 | 1458 | |
1372 | 1459 | <!-- Term Vector Component |
1373 | 1460 |
@@ -1694,7 +1781,18 @@ | ||
1694 | 1781 | rich documents injected via ExtractingRequestHandler. |
1695 | 1782 | See more about langId at http://wiki.apache.org/solr/LanguageDetection |
1696 | 1783 | --> |
1784 | + <!-- | |
1697 | 1785 | <updateRequestProcessorChain name="langid"> |
1786 | + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory"> | |
1787 | + <str name="langid.fl">text,title,subject,description</str> | |
1788 | + <str name="langid.langField">language_s</str> | |
1789 | + <str name="langid.fallback">en</str> | |
1790 | + </processor> | |
1791 | + <processor class="solr.LogUpdateProcessorFactory" /> | |
1792 | + <processor class="solr.RunUpdateProcessorFactory" /> | |
1793 | + </updateRequestProcessorChain> | |
1794 | + --> | |
1795 | + <updateRequestProcessorChain name="langid"> | |
1698 | 1796 | <processor class="org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory"> |
1699 | 1797 | <str name="langid.fl">content</str> |
1700 | 1798 | <str name="langid.langField">solrlang_s</str> |
@@ -1751,6 +1849,7 @@ | ||
1751 | 1849 | <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> |
1752 | 1850 | <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> |
1753 | 1851 | <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> |
1852 | + <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/> | |
1754 | 1853 | --> |
1755 | 1854 | |
1756 | 1855 | <queryResponseWriter name="json" class="solr.JSONResponseWriter"> |
@@ -1800,6 +1899,8 @@ | ||
1800 | 1899 | <valueSourceParser name="myfunc" |
1801 | 1900 | class="com.mycompany.MyValueSourceParser" /> |
1802 | 1901 | --> |
1902 | + <valueSourceParser name="wordfreq" | |
1903 | + class="jp.sf.fess.solr.plugin.search.WordFreqValueSourceParser" /> | |
1803 | 1904 | |
1804 | 1905 | |
1805 | 1906 | <!-- Document Transformers |
@@ -7,6 +7,7 @@ | ||
7 | 7 | SUBCMD=$1 |
8 | 8 | SOLR_PORT=$2 |
9 | 9 | NUM_SHARDS=$3 |
10 | +MAX_SHARDS=$4 | |
10 | 11 | CONFIG_NAME=collection1 |
11 | 12 | LOG_DIR=$BASE_DIR/logs/ |
12 | 13 | PID_FILE=$BASE_DIR/bin/pid |
@@ -41,6 +42,10 @@ | ||
41 | 42 | exit 1; |
42 | 43 | fi |
43 | 44 | |
45 | +if [ x$MAX_SHARDS = "x" ] ; then | |
46 | + MAX_SHARDS=1 | |
47 | +fi | |
48 | + | |
44 | 49 | mkdir -p $LOG_DIR |
45 | 50 | |
46 | 51 | ZK_HOSTS="__FESS_ZK_HOSTS__" |
@@ -55,7 +60,7 @@ | ||
55 | 60 | fi |
56 | 61 | ZK_PORT=`expr $SOLR_PORT + 1000` |
57 | 62 | echo "ZooKeeper Port: $ZK_PORT" |
58 | - SOLR_OPTS="-Djetty.port=$SOLR_PORT -Dbootstrap_confdir=$BASE_DIR/solr/$CONFIG_NAME/conf -Dcollection.configName=$CONFIG_NAME -DzkRun -DzkHost=$ZK_HOSTS -DnumShards=$NUM_SHARDS -Dsolr.solr.home=$BASE_DIR/solr -Dsolr.core.name=$CONFIG_NAME" | |
63 | + SOLR_OPTS="-Djetty.port=$SOLR_PORT -Dbootstrap_confdir=$BASE_DIR/solr/$CONFIG_NAME/conf -Dcollection.configName=$CONFIG_NAME -DzkRun -DzkHost=$ZK_HOSTS -DnumShards=$NUM_SHARDS -DmaxShardsPerNode=$MAX_SHARDS -Dsolr.solr.home=$BASE_DIR/solr -Dsolr.core.name=$CONFIG_NAME" | |
59 | 64 | echo "Start ZooKeeper and Solr server." |
60 | 65 | elif [ x$SUBCMD = "xreplica" ] ; then |
61 | 66 | ZK_PORT=`expr $SOLR_PORT + 1000` |
@@ -8,11 +8,11 @@ | ||
8 | 8 | FESS_SRC_DIR=$BASE_DIR/src/fess |
9 | 9 | SOLR_SRC_DIR=$BASE_DIR/src/solr |
10 | 10 | |
11 | -FESS_DOWNLOAD_URL="http://sourceforge.jp/frs/redir.php?m=jaist&f=%2Ffess%2F58184%2Ffess-server-8.0.0.zip" | |
12 | -FESS_NAME=fess-server-8.0.0 | |
11 | +FESS_DOWNLOAD_URL="http://sourceforge.jp/frs/redir.php?m=iij&f=%2Ffess%2F59462%2Ffess-server-8.2.0.zip" | |
12 | +FESS_NAME=fess-server-8.2.0 | |
13 | 13 | FESS_SERVER_DIR=$BUILD_DIR/$FESS_NAME |
14 | -SOLR_DOWNLOAD_URL=http://archive.apache.org/dist/lucene/solr/4.1.0/solr-4.1.0.zip | |
15 | -SOLR_NAME=solr-4.1.0 | |
14 | +SOLR_DOWNLOAD_URL=http://archive.apache.org/dist/lucene/solr/4.4.0/solr-4.4.0.zip | |
15 | +SOLR_NAME=solr-4.4.0 | |
16 | 16 | SOLR_DIR=$BUILD_DIR/$SOLR_NAME |
17 | 17 | SOLR_CLOUD_DIR=$BUILD_DIR/fess-cloud-jetty |
18 | 18 |
@@ -0,0 +1,33 @@ | ||
1 | +#!/bin/bash | |
2 | + | |
3 | +bash build.sh localhost:9180,localhost:9280,localhost:9380 | |
4 | + | |
5 | +cd target | |
6 | +cp -r fess-cloud-jetty/ fess-cloud-1 | |
7 | +cp -r fess-cloud-jetty/ fess-cloud-2 | |
8 | +cp -r fess-cloud-jetty/ fess-cloud-3 | |
9 | +cp -r fess-cloud-jetty/ fess-cloud-4 | |
10 | +chmod +x fess-cloud-*/bin/*.sh | |
11 | +cd .. | |
12 | + | |
13 | +cd target/fess-cloud-1/ | |
14 | +bash bin/server.sh leader 8180 3 3 | |
15 | +tail -f logs/solrcloud.log & | |
16 | +cd ../.. | |
17 | +sleep 10 | |
18 | + | |
19 | +cd target/fess-cloud-2/ | |
20 | +bash bin/server.sh replica 8280 | |
21 | +tail -f logs/solrcloud.log & | |
22 | +cd ../.. | |
23 | +sleep 10 | |
24 | + | |
25 | +cd target/fess-cloud-3/ | |
26 | +bash bin/server.sh replica 8380 | |
27 | +tail -f logs/solrcloud.log & | |
28 | +cd ../.. | |
29 | +sleep 10 | |
30 | + | |
31 | +cd target/fess-cloud-4/ | |
32 | +bash bin/server.sh solr 8480 | |
33 | +tail -f logs/solrcloud.log & |