diff --git a/README.md b/README.md
index 1f4e085..05f8673 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ localization quality assurance testers only.
You can see all these concepts in action
in [our HTTP server example implementation](./examples/locales-http-examples).
-#### Calculate the affinity between two locales
+#### Calculate the affinity between locales
This feature enables you to easily and programmatically reason around affinity between locales,
without having to know anything about how they relate to each other.
@@ -77,18 +77,31 @@ We define the affinity between two locales using a `LocaleAffinity` enum value:
should understand both if they understand one of them.
- `SAME`: Locales identify the same language
-We offer two separate logics, each dedicated to separate use-cases:
+We offer separate affinity logics, each dedicated to separate use-cases:
-- **Locale affinity calculation**: To be used when we need visibility on the affinity of a given
- locale against a set of locales.
-- **Reference locales calculation:** To be used when we need to join two datasets based on language
- identifiers. It is indeed impossible to perform such a join operation out of the box, as language
- identifiers can immensely differ even when they are syntactically valid and identify the very same
- language. For Example: `zh-Hant`, `zh-HK`, `zh-MO`, `zh-Hant-TW`, `zh-Hant-FR`, `zh-US` all
- identify Traditional Chinese, but `zh` and `zh-CN` identify Simplified Chinese.
+##### Calculate the affinity of a given locale against a set of locales
-You can see all these concepts in action
-in [our locales affinity example implementations](./examples/locales-affinity-examples).
+This should be used when we need visibility on the affinity of a given locale, against a set of
+pre-configured locales. This can, for instance, be used to verify whether some content language is a
+good match for a given user, based on the Accept-Language header value received in an incoming
+request.
+
+You can see this concept in action
+in [our example implementation](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityCalculationExampleMain.java).
+
+##### Calculate the affinity between 2 given locales
+
+This should be used when we need visibility on the affinity between two given locales. This can, for
+instance, be used to join two datasets based on language identifiers and how they related to each
+other in terms of affinity.
+
+It is indeed impossible to perform such a join operation out of the box, as language identifiers
+can immensely differ even when they are syntactically valid and identify the very same language. For
+example: `zh-Hant`, `zh-HK`, `zh-MO`, `zh-Hant-TW`, `zh-Hant-FR`, `zh-US` all
+identify Traditional Chinese, but `zh` and `zh-CN` identify Simplified Chinese.
+
+You can see this concept in action
+in [our example implementation](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java).
### Utility helpers
diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java
new file mode 100644
index 0000000..2f6933d
--- /dev/null
+++ b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java
@@ -0,0 +1,122 @@
+/*-
+ * -\-\-
+ * locales-affinity-examples
+ * --
+ * Copyright (C) 2016 - 2025 Spotify AB
+ * --
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * -/-/-
+ */
+
+package com.spotify.i18n.locales.affinity.examples;
+
+import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator;
+import com.spotify.i18n.locales.common.LocaleAffinityHelpersFactory;
+import com.spotify.i18n.locales.common.model.LocaleAffinityResult;
+import java.util.List;
+
+/**
+ * Showcase implementation of Java-locales affinity calculation
+ *
+ * @author Eric Fjøsne
+ */
+public class AffinityBasedJoinExampleMain {
+
+ /** Create a {@link LocaleAffinityBiCalculator} instance out of the factory */
+ private static final LocaleAffinityBiCalculator LOCALE_AFFINITY_BI_CALCULATOR =
+ LocaleAffinityHelpersFactory.getDefaultInstance().buildAffinityBiCalculator();
+
+ /**
+ * Example logic which attempts to join 2 sets of language tags.
+ *
+ *
Possible joins in the execution output are:
+ *
+ *
+ * - (bs-Cyrl-BA, bs-Latn) -> Join possible with SAME affinity.
+ *
- (bs-Cyrl-BA, hr-MK) -> Join possible with MUTUALLY_INTELLIGIBLE affinity.
+ *
- (de, de-AT) -> Join possible with SAME affinity.
+ *
- (da-SE, nb-FI) -> Join possible with HIGH affinity.
+ *
- (en-GB, en-JP) -> Join possible with SAME affinity.
+ *
- (en-GB, en-SE) -> Join possible with SAME affinity.
+ *
- (es-BE, ca) -> Join possible with LOW affinity.
+ *
- (fr-SE, fr-BE-u-ca-gregorian) -> Join possible with SAME affinity.
+ *
- (fr-SE, fr-CA) -> Join possible with SAME affinity.
+ *
- (hr-BA, bs-Latn) -> Join possible with MUTUALLY_INTELLIGIBLE affinity.
+ *
- (hr-BA, hr-MK) -> Join possible with SAME affinity.
+ *
- (ja-IT, ja@calendar=buddhist) -> Join possible with SAME affinity.
+ *
- (nl-BE, nl-ZA) -> Join possible with SAME affinity.
+ *
- (zh-Hans-US, zh-CN) -> Join possible with SAME affinity.
+ *
+ *
+ * @param args
+ */
+ public static void main(String[] args) {
+ final List languageTagsInOriginDataset =
+ List.of(
+ "bs-Cyrl-BA", // Bosnian (Cyrillic), Bosnia and Herzegovina
+ "de", // German
+ "da-SE", // Danish (Sweden)
+ "en-GB", // English (Great-Britain)
+ "es-BE", // Spanish (Belgium)
+ "fr-SE", // French (Sweden)
+ "hr-BA", // Croatian (Bosnia and Herzegovina)
+ "it-CH", // Italian (Switzerland)
+ "ja-IT", // Japanese (Italy)
+ "nl-BE", // Dutch (Belgium)
+ "zh-Hans-US", // Chinese (Simplified) (USA)
+ "zh-HK" // Chinese (Hong-Kong)
+ );
+
+ final List languageTagsInTargetDataset =
+ List.of(
+ "bs-Latn", // Bosnian (Latin)
+ "ca", // Catalan
+ "de-AT", // German (Austria)
+ "en-JP", // English (Japan)
+ "en-SE", // English (Sweden)
+ "fr-BE-u-ca-gregorian", // French (Belgium), with gregorian calendar extension
+ "fr-CA", // French (Canada)
+ "hr-MK", // Croatian (North Macedonia)
+ "ja@calendar=buddhist", // Japanese, with buddhist calendar extension
+ "nb-FI", // Norwegian Bokmål (Finland)
+ "nl-ZA", // Dutch (South Africa)
+ "pt-US", // Portuguese (USA)
+ "zh-CN" // Chinese (Mainland China)
+ );
+
+ // Iterate through all possible combinations, and calculate the affinity for each of them.
+ for (String languageTagInOriginDataset : languageTagsInOriginDataset) {
+ for (String languageTagInTargetDataset : languageTagsInTargetDataset) {
+ LocaleAffinityResult affinityResult =
+ LOCALE_AFFINITY_BI_CALCULATOR.calculate(
+ languageTagInOriginDataset, languageTagInTargetDataset);
+ switch (affinityResult.affinity()) {
+ case NONE:
+ System.out.println(
+ String.format(
+ "(%s, %s) -> No join possible.",
+ languageTagInOriginDataset, languageTagInTargetDataset));
+ break;
+ default:
+ System.out.println(
+ String.format(
+ "(%s, %s) -> Join possible with %s affinity.",
+ languageTagInOriginDataset,
+ languageTagInTargetDataset,
+ affinityResult.affinity()));
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java
deleted file mode 100644
index 61c1697..0000000
--- a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*-
- * -\-\-
- * locales-affinity-examples
- * --
- * Copyright (C) 2016 - 2025 Spotify AB
- * --
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * -/-/-
- */
-
-package com.spotify.i18n.locales.affinity.examples;
-
-import com.ibm.icu.util.ULocale;
-import com.spotify.i18n.locales.common.LocaleAffinityHelpersFactory;
-import com.spotify.i18n.locales.common.ReferenceLocalesCalculator;
-import com.spotify.i18n.locales.common.model.RelatedReferenceLocale;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
-/**
- * Showcase implementation of Java-locales affinity calculation
- *
- * @author Eric Fjøsne
- */
-public class ReferenceLocalesBasedJoinExampleMain {
-
- /** Create a {@link ReferenceLocalesCalculator} instance out of the factory */
- private static final ReferenceLocalesCalculator REFERENCE_LOCALES_CALCULATOR =
- LocaleAffinityHelpersFactory.getDefaultInstance().buildRelatedReferenceLocalesCalculator();
-
- /**
- * Example logic which attempts to join 2 sets of language tags.
- *
- * Possible joins in the execution output are:
- *
- *
- * - (de, de-AT) on reference locale [de-AT] with SAME affinity
- *
- (en-GB, en-JP) on reference locale [en-GB] with SAME affinity
- *
- (en-GB, en-SE) on reference locale [en-SE] with SAME affinity
- *
- (es-BE, ca) on reference locale [ca] with LOW affinity
- *
- (fr-SE, fr-BE-u-ca-gregorian) on reference locale [fr-BE] with SAME affinity
- *
- (fr-SE, fr-CA) on reference locale [fr-CA] with SAME affinity
- *
- (ja-IT, ja@calendar=buddhist) on reference locale [ja] with SAME affinity
- *
- (nl-BE, nl-ZA) on reference locale [nl] with SAME affinity
- *
- (zh-Hans-US, zh-CN) on reference locale [zh] with SAME affinity
- *
- *
- * @param args
- */
- public static void main(String[] args) {
- final List languageTagsInOriginDataset =
- List.of(
- "de", // German
- "en-GB", // English (Great-Britain)
- "es-BE", // Spanish (Belgium)
- "fr-SE", // French (Sweden)
- "it-CH", // Italian (Switzerland)
- "ja-IT", // Japanese (Italy)
- "nl-BE", // Dutch (Belgium)
- "zh-Hans-US", // Chinese (Simplified) (USA)
- "zh-HK" // Chinese (Hong-Kong)
- );
- final List languageTagsInTargetDataset =
- List.of(
- "ca", // Catalan
- "de-AT", // German (Austria)
- "en-JP", // English (Japan)
- "en-SE", // English (Sweden)
- "fr-BE-u-ca-gregorian", // French (Belgium), with gregorian calendar extension
- "fr-CA", // French (Canada)
- "ja@calendar=buddhist", // Japanese, with buddhist calendar extension
- "nl-ZA", // Dutch (South Africa)
- "pt-US", // Portuguese (USA)
- "zh-CN" // Chinese (Mainland China)
- );
-
- // Iterate through all possible combinations
- for (String languageTagInOriginDataset : languageTagsInOriginDataset) {
- for (String languageTagInTargetDataset : languageTagsInTargetDataset) {
- // Retrieve the optional related reference locale based on which a join operation can be
- // performed, and display the outcome in the execution output.
- getRelatedReferenceLocaleForJoin(languageTagInOriginDataset, languageTagInTargetDataset)
- .ifPresentOrElse(
- (rrl) ->
- System.out.println(
- String.format(
- "(%s, %s) on reference locale [%s] with %s affinity",
- languageTagInOriginDataset,
- languageTagInTargetDataset,
- rrl.referenceLocale().toLanguageTag(),
- rrl.affinity())),
- () ->
- System.out.println(
- String.format(
- "(%s, %s), no join possible.",
- languageTagInOriginDataset, languageTagInTargetDataset)));
- }
- }
- }
-
- /**
- * Returns the optional {@link RelatedReferenceLocale} based on which the 2 language tags can be
- * joined together.
- */
- public static Optional getRelatedReferenceLocaleForJoin(
- final String languageTagInOriginDataset, final String languageTagInTargetDataset) {
- Optional bestMatchingReferenceLocale =
- REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale(
- languageTagInTargetDataset);
- if (bestMatchingReferenceLocale.isEmpty()) {
- return Optional.empty();
- } else {
- List relatedReferenceLocales =
- REFERENCE_LOCALES_CALCULATOR
- .calculateRelatedReferenceLocales(languageTagInOriginDataset)
- .stream()
- .collect(Collectors.toList());
- return relatedReferenceLocales.stream()
- .filter(rrl -> rrl.referenceLocale().equals(bestMatchingReferenceLocale.get()))
- .findFirst();
- }
- }
-}
diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java
new file mode 100644
index 0000000..440ecb0
--- /dev/null
+++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java
@@ -0,0 +1,42 @@
+/*-
+ * -\-\-
+ * locales-common
+ * --
+ * Copyright (C) 2016 - 2025 Spotify AB
+ * --
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * -/-/-
+ */
+
+package com.spotify.i18n.locales.common;
+
+import com.spotify.i18n.locales.common.model.LocaleAffinityResult;
+import edu.umd.cs.findbugs.annotations.Nullable;
+
+/**
+ * Represents an engine that calculates the locale affinity between two given language tags. All
+ * implementations of this interface must return a non-null {@link LocaleAffinityResult}, even when
+ * the given language tags are null or empty.
+ *
+ * @author Eric Fjøsne
+ */
+public interface LocaleAffinityBiCalculator {
+
+ /**
+ * Returns the calculated {@link LocaleAffinityResult} for the two given language tags
+ *
+ * @return the locale affinity result
+ */
+ LocaleAffinityResult calculate(
+ @Nullable final String languageTag1, @Nullable final String languageTag2);
+}
diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java
index 018274e..829a551 100644
--- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java
+++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java
@@ -24,6 +24,7 @@
import com.ibm.icu.util.ULocale;
import com.spotify.i18n.locales.common.impl.LocaleAffinityCalculatorBaseImpl;
import com.spotify.i18n.locales.common.impl.ReferenceLocalesCalculatorBaseImpl;
+import com.spotify.i18n.locales.common.model.LocaleAffinity;
import com.spotify.i18n.locales.utils.acceptlanguage.AcceptLanguageUtils;
import com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils;
import edu.umd.cs.findbugs.annotations.Nullable;
@@ -36,13 +37,15 @@
* A factory for creating instances of locale affinity related helpers:
*
*
- * - {@link LocaleAffinityCalculator}: A helper that calculates a locale affinity for a language
- * tag, against a given set of locales.
- *
- {@link ReferenceLocalesCalculator}: A helper that enables reference locales based
- * operations, most notably to join datasets by enabling match operations between an origin
- * and a target locale, and enabling filtering on the affinity between these locales.
+ *
- {@link LocaleAffinityCalculator}: A helper that calculates the locale affinity for a given
+ * language tag, against a configured set of locales.
+ *
- {@link LocaleAffinityBiCalculator}: A helper that calculates the locale affinity between
+ * two given language tags.
+ *
- {@link ReferenceLocalesCalculator}: A helper that enables reference locale-based
+ * operations.
*
*
+ * @see LocaleAffinity
* @author Eric Fjøsne
*/
public class LocaleAffinityHelpersFactory {
@@ -64,6 +67,7 @@ private LocaleAffinityHelpersFactory() {}
*
* @param acceptLanguage The Accept-Language value
* @return Pre-configured locale affinity calculator
+ * @see LocaleAffinity
* @see LocaleAffinityCalculator
* @see Accept-Language
@@ -82,6 +86,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForAcceptLanguage(
* calculate affinity for a language tag, against all the given supplied locales.
*
* @return Pre-configured locale affinity calculator
+ * @see LocaleAffinity
* @see LocaleAffinityCalculator
* @see ULocale
*/
@@ -97,6 +102,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLocales(final SetInvalid or improperly formatted language tags will be ignored.
*
* @return Pre-configured locale affinity calculator
+ * @see LocaleAffinity
* @see LocaleAffinityCalculator
* @see IETF BCP 47 language tag
*/
@@ -110,6 +116,18 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags(
.collect(Collectors.toSet()));
}
+ /**
+ * Returns a pre-configured, ready-to-use instance of {@link LocaleAffinityBiCalculator}, that can
+ * calculate the affinity between two given language tags.
+ *
+ * @return Pre-configured locale affinity bi-calculator
+ * @see LocaleAffinity
+ * @see LocaleAffinityBiCalculator
+ */
+ public LocaleAffinityBiCalculator buildAffinityBiCalculator() {
+ return ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator();
+ }
+
/**
* Returns a pre-configured, ready-to-use instance of {@link ReferenceLocalesCalculator}.
*
@@ -117,6 +135,6 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags(
* @see ReferenceLocalesCalculator
*/
public ReferenceLocalesCalculator buildRelatedReferenceLocalesCalculator() {
- return ReferenceLocalesCalculatorBaseImpl.builder().build();
+ return ReferenceLocalesCalculatorBaseImpl.builder().buildReferenceLocalesCalculator();
}
}
diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java
index 1f432a2..00b50dd 100644
--- a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java
+++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java
@@ -79,6 +79,10 @@ public abstract class LocaleAffinityCalculatorBaseImpl implements LocaleAffinity
private static final int SCORE_THRESHOLD_HIGH = 30;
private static final int SCORE_THRESHOLD_LOW = 0;
+ // Language codes for which we need some manual tweaks
+ private static final String LANGUAGE_CODE_CROATIAN = "hr";
+ private static final String LANGUAGE_CODE_BOSNIAN = "bs";
+
/**
* Returns the set of {@link ULocale} against which affinity is being calculated.
*
@@ -167,6 +171,13 @@ private LocaleAffinity convertScoreToLocaleAffinity(final int score) {
}
private int getDistanceBetweenInputAndSupported(final LSR maxParsed, final LSR maxSupported) {
+ // Croatian should be matched with Bosnian. This is the case for Bosnian written in Latin
+ // script, but not Cyrillic, because the ICU implementation enforces script matching. We
+ // created a workaround to ensure that we return a MUTUALLY_INTELLIGIBLE affinity when
+ // encountering this locale.
+ if (calculatingDistanceBetweenCroatianAndBosnian(maxParsed, maxSupported)) {
+ return 0;
+ }
return LOCALE_DISTANCE_INSTANCE.getBestIndexAndDistance(
maxParsed,
new LSR[] {maxSupported},
@@ -176,6 +187,13 @@ private int getDistanceBetweenInputAndSupported(final LSR maxParsed, final LSR m
LOCALE_DISTANCE_DIRECTION);
}
+ private boolean calculatingDistanceBetweenCroatianAndBosnian(final LSR lsr1, final LSR lsr2) {
+ return (lsr1.language.equals(LANGUAGE_CODE_CROATIAN)
+ && lsr2.language.equals(LANGUAGE_CODE_BOSNIAN))
+ || (lsr1.language.equals(LANGUAGE_CODE_BOSNIAN)
+ && lsr2.language.equals(LANGUAGE_CODE_CROATIAN));
+ }
+
private static LSR getMaximizedLanguageScriptRegion(final ULocale locale) {
return LIKELY_SUBTAGS_INSTANCE.makeMaximizedLsrFrom(
locale, LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH);
diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java
index 4f8347e..241986c 100644
--- a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java
+++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java
@@ -20,12 +20,16 @@
package com.spotify.i18n.locales.common.impl;
+import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale;
+
import com.google.auto.value.AutoValue;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.ULocale;
+import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator;
import com.spotify.i18n.locales.common.LocaleAffinityCalculator;
import com.spotify.i18n.locales.common.ReferenceLocalesCalculator;
import com.spotify.i18n.locales.common.model.LocaleAffinity;
+import com.spotify.i18n.locales.common.model.LocaleAffinityResult;
import com.spotify.i18n.locales.common.model.RelatedReferenceLocale;
import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils;
import com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils;
@@ -48,7 +52,8 @@
* @author Eric Fjøsne
*/
@AutoValue
-public abstract class ReferenceLocalesCalculatorBaseImpl implements ReferenceLocalesCalculator {
+public abstract class ReferenceLocalesCalculatorBaseImpl
+ implements ReferenceLocalesCalculator, LocaleAffinityBiCalculator {
/** Prepared {@link LocaleMatcher}, ready to find the best matching reference locale */
private static final LocaleMatcher REFERENCE_LOCALE_MATCHER =
@@ -105,6 +110,23 @@ public Optional calculateBestMatchingReferenceLocale(
return LanguageTagUtils.parse(languageTag).map(REFERENCE_LOCALE_MATCHER::getBestMatch);
}
+ @Override
+ public LocaleAffinityResult calculate(
+ @Nullable final String languageTag1, @Nullable final String languageTag2) {
+ return LocaleAffinityResult.builder()
+ .affinity(
+ calculateBestMatchingReferenceLocale(languageTag2)
+ .map(
+ referenceLocale ->
+ calculateRelatedReferenceLocales(languageTag1).stream()
+ .filter(rrl -> isSameLocale(rrl.referenceLocale(), referenceLocale))
+ .findFirst()
+ .map(RelatedReferenceLocale::affinity)
+ .orElse(LocaleAffinity.NONE))
+ .orElse(LocaleAffinity.NONE))
+ .build();
+ }
+
/**
* Returns a {@link Builder} instance that will allow you to manually create a {@link
* ReferenceLocalesCalculatorBaseImpl} instance.
@@ -123,7 +145,12 @@ public abstract static class Builder {
abstract ReferenceLocalesCalculatorBaseImpl autoBuild();
/** Builds a {@link ReferenceLocalesCalculator} out of this builder. */
- public final ReferenceLocalesCalculator build() {
+ public final ReferenceLocalesCalculator buildReferenceLocalesCalculator() {
+ return autoBuild();
+ }
+
+ /** Builds a {@link LocaleAffinityBiCalculator} out of this builder. */
+ public final LocaleAffinityBiCalculator buildLocaleAffinityBiCalculator() {
return autoBuild();
}
}
diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java
index 91af1b1..503227e 100644
--- a/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java
+++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java
@@ -157,6 +157,13 @@ void whenBuildingRelatedReferenceLocalesCalculator_returnsExpectedCalculator() {
instanceof ReferenceLocalesCalculator);
}
+ @Test
+ void whenBuildingAffinityBiCalculator_returnsExpectedCalculator() {
+ assertTrue(
+ LocaleAffinityHelpersFactory.getDefaultInstance().buildAffinityBiCalculator()
+ instanceof LocaleAffinityBiCalculator);
+ }
+
@ParameterizedTest
@MethodSource
void
diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java
index 42e0cd3..b931f2f 100644
--- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java
+++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java
@@ -49,7 +49,7 @@ class LocaleAffinityCalculatorBaseImplTest {
public static final LocaleAffinityCalculator CALCULATOR_AGAINST_TEST_SET_OF_LOCALES =
LocaleAffinityCalculatorBaseImpl.builder()
.againstLocales(
- Set.of("ar", "bs", "es", "fr", "ja", "pt", "sr-Latn", "zh-Hant").stream()
+ Set.of("ar", "bs-Cyrl", "es", "fr", "ja", "pt", "sr-Latn", "zh-Hant").stream()
.map(ULocale::forLanguageTag)
.collect(Collectors.toSet()))
.build();
@@ -107,6 +107,14 @@ public static Stream whenCalculating_returnsExpectedAffinity() {
Arguments.of("ca-ES", LOW),
Arguments.of("ca-AD", LOW),
+ // Bosnian should be matched for all scripts and regions, since we support Bosnian
+ Arguments.of("bs", SAME),
+ Arguments.of("bs-Latn", SAME),
+ Arguments.of("bs-Cyrl", SAME),
+ Arguments.of("bs-BA", SAME),
+ Arguments.of("bs-Latn-BA", SAME),
+ Arguments.of("bs-Cyrl-BA", SAME),
+
// No english should be matched
Arguments.of("en", NONE),
Arguments.of("en-GB", NONE),
diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java
index bd12a58..c939584 100644
--- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java
+++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java
@@ -23,6 +23,7 @@
import static com.spotify.i18n.locales.common.model.LocaleAffinity.HIGH;
import static com.spotify.i18n.locales.common.model.LocaleAffinity.LOW;
import static com.spotify.i18n.locales.common.model.LocaleAffinity.MUTUALLY_INTELLIGIBLE;
+import static com.spotify.i18n.locales.common.model.LocaleAffinity.NONE;
import static com.spotify.i18n.locales.common.model.LocaleAffinity.SAME;
import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale;
import static org.hamcrest.MatcherAssert.assertThat;
@@ -31,14 +32,17 @@
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.ULocale.Builder;
+import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator;
import com.spotify.i18n.locales.common.ReferenceLocalesCalculator;
import com.spotify.i18n.locales.common.model.LocaleAffinity;
import com.spotify.i18n.locales.common.model.RelatedReferenceLocale;
import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils;
import com.spotify.i18n.locales.utils.language.LanguageUtils;
+import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
+import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
@@ -46,7 +50,10 @@
class ReferenceLocalesCalculatorBaseImplTest {
public static final ReferenceLocalesCalculator REFERENCE_LOCALES_CALCULATOR =
- ReferenceLocalesCalculatorBaseImpl.builder().build();
+ ReferenceLocalesCalculatorBaseImpl.builder().buildReferenceLocalesCalculator();
+
+ public static final LocaleAffinityBiCalculator LOCALE_AFFINITY_BI_CALCULATOR =
+ ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator();
public static Stream validateLocaleAffinityScoreRanges() {
return AvailableLocalesUtils.getCldrLocales().stream().map(Arguments::of);
@@ -131,8 +138,6 @@ private boolean areKnownMutuallyIntelligibleLocales(ULocale inputLS, ULocale ref
switch (input) {
// Bosnian and Croatian
case "bs-Latn":
- return reference.equals("hr-Latn");
- // Bosnian and Croatian
case "bs-Cyrl":
return reference.equals("hr-Latn");
// Croatian and Bosnian
@@ -501,4 +506,49 @@ private static List serbian() {
private static List swedish() {
return List.of(rrl("sv", SAME), rrl("sv-AX", SAME), rrl("sv-FI", SAME));
}
+
+ @Test
+ public void whenCalculatingForOutlierValues_returnsExpected() {
+ assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(null, null).affinity());
+ assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("", "").affinity());
+ assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(null, "").affinity());
+ assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("", null).affinity());
+ assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(" ", " ").affinity());
+ }
+
+ @Test
+ public void whenCalculatingBestMatchingReferenceLocaleForOutlierValues_returnsExpected() {
+ assertEquals(
+ Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale(null));
+ assertEquals(
+ Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale(""));
+ assertEquals(
+ Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale(" "));
+ }
+
+ @Test
+ public void whenCalculatingRelatedReferenceLocalesForOutlierValues_returnsExpected() {
+ assertEquals(
+ Collections.emptyList(),
+ REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales(null));
+ assertEquals(
+ Collections.emptyList(), REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales(""));
+ assertEquals(
+ Collections.emptyList(),
+ REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales(" "));
+ }
+
+ @Test
+ public void calculateBiAffinity() {
+ assertEquals(
+ MUTUALLY_INTELLIGIBLE,
+ LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Latn", "hr-BA").affinity());
+ assertEquals(
+ MUTUALLY_INTELLIGIBLE,
+ LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Cyrl", "hr-BA").affinity());
+ assertEquals(
+ MUTUALLY_INTELLIGIBLE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs", "hr-BA").affinity());
+ assertEquals(
+ MUTUALLY_INTELLIGIBLE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Latn", "hr").affinity());
+ }
}
diff --git a/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java b/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java
index e8a72f6..9edb7bd 100644
--- a/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java
+++ b/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java
@@ -63,8 +63,9 @@ public static Optional getWrittenLanguageLocale(final String languageTa
}
private static ULocale getWrittenLanguageLocaleForLocale(final ULocale locale) {
- // Croatian is Bosnia is matched with Bosnian (Latin script). This is likely a bug in icu4j. We
- // created a workaround to ensure that we return Croatian when encountering this locale.
+ // The written language locale matcher matches Croatian in Bosnia with Bosnian (Latin script).
+ // This is likely a bug in icu4j. We created a workaround to ensure that we return Croatian when
+ // encountering this locale.
if (isCroatianBosnia(locale)) {
return CROATIAN;
} else {