From b5fae2d58069e5072a70ac71b3f008cd4e18b1c5 Mon Sep 17 00:00:00 2001 From: Matthias Steffens Date: Fri, 17 Nov 2023 21:09:09 +0100 Subject: [PATCH 1/7] #1161 Adds configuration options for a weighted search --- application/configs/application.ini | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/application/configs/application.ini b/application/configs/application.ini index 95df138bf..e979ac667 100644 --- a/application/configs/application.ini +++ b/application/configs/application.ini @@ -227,6 +227,28 @@ search.index.field.year.order = 'PublishedDate,PublishedYear' ; Enrichment fields to be excluded from indexing (comma separated) search.index.enrichment.blacklist = 'opus_doi_json' +; WEIGHTED SEARCH +; if set to `1` a weighted search will be used where query matches in particular fields can be +; assigned different importance +search.weightedSearch = 0 + +; boost factors for fields which increase (>1.0) or decrease (<1.0) the importance of query matches +; in that field +search.simple.abstract = 1 +search.simple.title = 1 +search.simple.author = 1 +search.simple.subject = 1 +search.simple.title_parent = 1 +search.simple.title_additional = 1 +search.simple.title_sub = 1 +search.simple.creating_corporation = 1 +search.simple.contributing_corporation = 1 +search.simple.publisher_name = 1 +search.simple.publisher_place = 1 +search.simple.identifier = 1 +search.simple.persons = 1 +search.simple.fulltext = 1 + ;DOCTYPE VALIDATION SCHEMA FILE ; TODO determine path dynamically (does this belong into the framework) documentTypes.xmlSchema = APPLICATION_PATH "/vendor/opus4-repo/framework/library/Opus/Document/documenttype.xsd" From c09e28c0f4f56d4bdd5d0b9f4c30832b2ee93af7 Mon Sep 17 00:00:00 2001 From: Matthias Steffens Date: Tue, 21 Nov 2023 17:47:19 +0100 Subject: [PATCH 2/7] #1161 Adds examples for weighted search configuration options to config.ini.template --- application/configs/application.ini | 18 +++++++++--------- application/configs/config.ini.template | 11 +++++++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/application/configs/application.ini b/application/configs/application.ini index e979ac667..023670844 100644 --- a/application/configs/application.ini +++ b/application/configs/application.ini @@ -235,19 +235,19 @@ search.weightedSearch = 0 ; boost factors for fields which increase (>1.0) or decrease (<1.0) the importance of query matches ; in that field search.simple.abstract = 1 -search.simple.title = 1 search.simple.author = 1 -search.simple.subject = 1 -search.simple.title_parent = 1 -search.simple.title_additional = 1 -search.simple.title_sub = 1 -search.simple.creating_corporation = 1 search.simple.contributing_corporation = 1 -search.simple.publisher_name = 1 -search.simple.publisher_place = 1 +search.simple.creating_corporation = 1 +search.simple.fulltext = 1 search.simple.identifier = 1 search.simple.persons = 1 -search.simple.fulltext = 1 +search.simple.publisher_name = 1 +search.simple.publisher_place = 1 +search.simple.subject = 1 +search.simple.title = 1 +search.simple.title_additional = 1 +search.simple.title_parent = 1 +search.simple.title_sub = 1 ;DOCTYPE VALIDATION SCHEMA FILE ; TODO determine path dynamically (does this belong into the framework) diff --git a/application/configs/config.ini.template b/application/configs/config.ini.template index e7752c206..8cd6d1252 100644 --- a/application/configs/config.ini.template +++ b/application/configs/config.ini.template @@ -68,6 +68,17 @@ searchengine.solr.default.service.extract.endpoint.localhost.timeout = 10 ; Turn on to display document titles of search results in user interface language if possible search.result.display.preferUserInterfaceLanguage = 0 +;WEIGHTED SEARCH +; if set to `1` a weighted search will be used where query matches in particular fields can be +; assigned different importance +;search.weightedSearch = 1 + +; boost factors for fields which increase (>1.0) or decrease (<1.0) the importance of query matches +; in that field +;search.simple.title = 10 +; boost factors for your instance's enrichment fields can be added in a similar fashion, e.g.: +;search.simple.Relation = 1.5 + ;MAIL SETTINGS ; mail.opus.smtp = localhost; SMTP server for sending email ; mail.opus.port = 25 ; SMTP server port for sending email From 536a6c953adbee76d6ad10ccbb35e9c5ad88f972 Mon Sep 17 00:00:00 2001 From: Matthias Steffens Date: Tue, 5 Dec 2023 21:23:16 +0100 Subject: [PATCH 3/7] #1161 The weighted search is now activated by default in application.ini --- application/configs/application.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/configs/application.ini b/application/configs/application.ini index 023670844..2795e29bf 100644 --- a/application/configs/application.ini +++ b/application/configs/application.ini @@ -230,7 +230,7 @@ search.index.enrichment.blacklist = 'opus_doi_json' ; WEIGHTED SEARCH ; if set to `1` a weighted search will be used where query matches in particular fields can be ; assigned different importance -search.weightedSearch = 0 +search.weightedSearch = 1 ; boost factors for fields which increase (>1.0) or decrease (<1.0) the importance of query matches ; in that field From dd9fe84bab6ea8893974d2eb9ec5653c07fa43c8 Mon Sep 17 00:00:00 2001 From: Matthias Steffens Date: Tue, 5 Dec 2023 21:24:25 +0100 Subject: [PATCH 4/7] #1161 Mention in the configuration that setting a field's boost factor to 0 will cause matches in that field to get ignored --- application/configs/application.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/configs/application.ini b/application/configs/application.ini index 2795e29bf..edcfa9389 100644 --- a/application/configs/application.ini +++ b/application/configs/application.ini @@ -233,7 +233,7 @@ search.index.enrichment.blacklist = 'opus_doi_json' search.weightedSearch = 1 ; boost factors for fields which increase (>1.0) or decrease (<1.0) the importance of query matches -; in that field +; in that field; set a field's boost factor to `0` to ignore matches in that field search.simple.abstract = 1 search.simple.author = 1 search.simple.contributing_corporation = 1 From fc12f0ca9b77f88ef70f5197b3d6b30b284de565 Mon Sep 17 00:00:00 2001 From: Matthias Steffens Date: Tue, 5 Dec 2023 21:36:22 +0100 Subject: [PATCH 5/7] #1161 Adds "search.weightMultiplier" configuration option which allows to increase the importance of phrase matches (i.e., cases where all query terms appear in close proximity) --- application/configs/application.ini | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/application/configs/application.ini b/application/configs/application.ini index edcfa9389..40e48c963 100644 --- a/application/configs/application.ini +++ b/application/configs/application.ini @@ -249,6 +249,10 @@ search.simple.title_additional = 1 search.simple.title_parent = 1 search.simple.title_sub = 1 +; multiplier (integer >0) to further increase field-specific boost factors (and thus the importance +; of matches) when matching phrases (i.e., in cases where all query terms appear in close proximity) +search.weightMultiplier = 5 + ;DOCTYPE VALIDATION SCHEMA FILE ; TODO determine path dynamically (does this belong into the framework) documentTypes.xmlSchema = APPLICATION_PATH "/vendor/opus4-repo/framework/library/Opus/Document/documenttype.xsd" From 650419b71d444485916d95b822d7a391de25c082 Mon Sep 17 00:00:00 2001 From: Matthias Steffens Date: Tue, 5 Dec 2023 21:43:15 +0100 Subject: [PATCH 6/7] #1161 Uses opus4-search dev branch which implements a weighted search --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 4dc83ae0b..8ca7cba96 100644 --- a/composer.json +++ b/composer.json @@ -24,7 +24,7 @@ "jpgraph/jpgraph": "dev-master", "opus4-repo/opus4-common": "dev-master as 4.8.1", "opus4-repo/framework": "dev-master as 4.8.1", - "opus4-repo/search": "4.7.3.x-dev || 4.8.1.x-dev", + "opus4-repo/search": "dev-weightedSearch37 as 4.8.1", "opus4-repo/opus4-bibtex": "^4.8", "opus4-repo/opus4-import": "dev-main as 4.8.1", "opus4-repo/opus4-pdf": "^4.8", From 885aa5e59b7f917fd691aa0b87ee49add52fa580 Mon Sep 17 00:00:00 2001 From: j3nsch Date: Fri, 17 May 2024 13:43:31 +0200 Subject: [PATCH 7/7] #1108 Change Solr version everywhere --- build.xml | 2 +- tests/bin/install_solr_docker.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build.xml b/build.xml index 69743c91e..dac93df74 100644 --- a/build.xml +++ b/build.xml @@ -8,7 +8,7 @@ - + diff --git a/tests/bin/install_solr_docker.sh b/tests/bin/install_solr_docker.sh index 333f03a73..daf8c93aa 100755 --- a/tests/bin/install_solr_docker.sh +++ b/tests/bin/install_solr_docker.sh @@ -1,11 +1,11 @@ #!/usr/bin/env bash # -# Script to install Solr. By default, version 9.4.0 will be installed. +# Script to install Solr. By default, version 9.5.0 will be installed. # Another Solr version can be specified using the `--version` option. # Define variables and their default values -version="9.4.0" +version="9.5.0" # Parse command line options while [ $# -gt 0 ]; do @@ -27,7 +27,7 @@ done # Check --version input if ! [[ "$version" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then echo "Unrecognized version number: $version" - echo "The --version option requires a 3-digit version number, e.g.: 9.4.0" + echo "The --version option requires a 3-digit version number, e.g.: 9.5.0" exit 1 fi