diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java
index 829a551..f291469 100644
--- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java
+++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java
@@ -22,6 +22,7 @@
import com.google.common.base.Preconditions;
import com.ibm.icu.util.ULocale;
+import com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl;
import com.spotify.i18n.locales.common.impl.LocaleAffinityCalculatorBaseImpl;
import com.spotify.i18n.locales.common.impl.ReferenceLocalesCalculatorBaseImpl;
import com.spotify.i18n.locales.common.model.LocaleAffinity;
@@ -125,7 +126,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags(
* @see LocaleAffinityBiCalculator
*/
public LocaleAffinityBiCalculator buildAffinityBiCalculator() {
- return ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator();
+ return LocaleAffinityBiCalculatorBaseImpl.builder().build();
}
/**
diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImpl.java
new file mode 100644
index 0000000..0325a94
--- /dev/null
+++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImpl.java
@@ -0,0 +1,226 @@
+/*-
+ * -\-\-
+ * locales-common
+ * --
+ * Copyright (C) 2016 - 2025 Spotify AB
+ * --
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * -/-/-
+ */
+
+package com.spotify.i18n.locales.common.impl;
+
+import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale;
+import static com.spotify.i18n.locales.utils.language.LanguageUtils.getSpokenLanguageLocale;
+import static com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils.parse;
+
+import com.google.auto.value.AutoValue;
+import com.ibm.icu.impl.locale.LSR;
+import com.ibm.icu.impl.locale.LikelySubtags;
+import com.ibm.icu.impl.locale.LocaleDistance;
+import com.ibm.icu.util.LocaleMatcher.Direction;
+import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
+import com.ibm.icu.util.ULocale;
+import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator;
+import com.spotify.i18n.locales.common.LocaleAffinityCalculator;
+import com.spotify.i18n.locales.common.model.LocaleAffinity;
+import com.spotify.i18n.locales.common.model.LocaleAffinityResult;
+import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils;
+import edu.umd.cs.findbugs.annotations.Nullable;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Base implementation of {@link LocaleAffinityBiCalculator} that calculates the locale affinity
+ * between two given language tags.
+ *
+ *
This class is not intended for public subclassing. New object instances must be created using
+ * the builder pattern, starting with the {@link #builder()} method.
+ *
+ * @author Eric Fjøsne
+ */
+@AutoValue
+public abstract class LocaleAffinityBiCalculatorBaseImpl implements LocaleAffinityBiCalculator {
+
+ // Set containing all available language codes in CLDR.
+ private static final Set AVAILABLE_LANGUAGE_CODES =
+ AvailableLocalesUtils.getReferenceLocales().stream()
+ .map(ULocale::getLanguage)
+ .collect(Collectors.toSet());
+
+ // LocaleDistance.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the
+ // only way to make use of this class, which provides the features we need here.
+ private static final LocaleDistance LOCALE_DISTANCE_INSTANCE = LocaleDistance.INSTANCE;
+
+ // LocaleDistance best distance method arguments, all assigned to their default as per icu
+ // implementation.
+ private static final int LOCALE_DISTANCE_SHIFTED =
+ LocaleDistance.shiftDistance(LOCALE_DISTANCE_INSTANCE.getDefaultScriptDistance());
+ private static final int LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH = 1;
+ private static final FavorSubtag LOCALE_DISTANCE_FAVOR_SUBTAG = FavorSubtag.LANGUAGE;
+ private static final Direction LOCALE_DISTANCE_DIRECTION = Direction.WITH_ONE_WAY;
+
+ // LikelySubtags.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the
+ // only way to make use of this class, which provides the features we need here.
+ private static final LikelySubtags LIKELY_SUBTAGS_INSTANCE = LikelySubtags.INSTANCE;
+
+ // LikelySubtags method arguments, all assigned to their default as per icu implementation.
+ private static final boolean LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH = false;
+
+ // Distance threshold: Anything above this value will be scored 0.
+ private static final double DISTANCE_THRESHOLD = 224.0;
+
+ // Score to affinity thresholds
+ private static final int SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE = 65;
+ private static final int SCORE_THRESHOLD_HIGH = 30;
+ private static final int SCORE_THRESHOLD_LOW = 0;
+
+ // Language codes for which we need some manual tweaks
+ private static final String LANGUAGE_CODE_CROATIAN = "hr";
+ private static final String LANGUAGE_CODE_BOSNIAN = "bs";
+
+ /**
+ * Returns the calculated {@link LocaleAffinityResult} for the given two language tags
+ *
+ * @return the locale affinity result
+ */
+ @Override
+ public LocaleAffinityResult calculate(
+ @Nullable final String languageTag1, @Nullable final String languageTag2) {
+ return LocaleAffinityResult.builder().affinity(getAffinity(languageTag1, languageTag2)).build();
+ }
+
+ private LocaleAffinity getAffinity(
+ @Nullable final String languageTag1, @Nullable final String languageTag2) {
+ // We parse the language tags, and filter out locales with a language unavailable in CLDR.
+ final Optional locale1 =
+ parse(languageTag1).filter(locale -> isAvailableLanguage(locale));
+ final Optional locale2 =
+ parse(languageTag2).filter(locale -> isAvailableLanguage(locale));
+
+ if (locale1.isPresent() && locale2.isPresent()) {
+ // We attempt to match based on corresponding spoken language first, and make use of the
+ // score-based affinity calculation as fallback.
+ if (hasSameSpokenLanguageAffinity(locale1.get(), locale2.get())) {
+ return LocaleAffinity.SAME;
+ } else {
+ return calculateScoreBasedAffinity(locale1.get(), locale2.get());
+ }
+ } else {
+ return LocaleAffinity.NONE;
+ }
+ }
+
+ private boolean hasSameSpokenLanguageAffinity(final ULocale locale1, final ULocale locale2) {
+ final Optional spoken1 = getSpokenLanguageLocale(locale1.toLanguageTag());
+ final Optional spoken2 = getSpokenLanguageLocale(locale2.toLanguageTag());
+ return spoken1.isPresent() && spoken2.isPresent() && isSameLocale(spoken1.get(), spoken2.get());
+ }
+
+ static LocaleAffinity calculateScoreBasedAffinity(final ULocale l1, final ULocale l2) {
+ int bestDistance = getBestDistanceBetweenLocales(l1, l2);
+ int correspondingScore = convertDistanceToAffinityScore(bestDistance);
+ return convertScoreToLocaleAffinity(correspondingScore);
+ }
+
+ static boolean isAvailableLanguage(final ULocale locale) {
+ return AVAILABLE_LANGUAGE_CODES.contains(locale.getLanguage().toLowerCase());
+ }
+
+ private static int getBestDistanceBetweenLocales(final ULocale locale1, final ULocale locale2) {
+ final LSR lsr1 = getMaximizedLanguageScriptRegion(locale1);
+ final LSR lsr2 = getMaximizedLanguageScriptRegion(locale2);
+ return getBestDistanceBetweenLSR(lsr1, lsr2);
+ }
+
+ static int getBestDistanceBetweenLSR(final LSR lsr1, final LSR lsr2) {
+ // Croatian should be matched with Bosnian. This is the case for Bosnian written in Latin
+ // script, but not Cyrillic, because the ICU implementation enforces script matching. We
+ // created a workaround to ensure that we return a MUTUALLY_INTELLIGIBLE affinity when
+ // encountering this locale.
+ if (calculatingDistanceBetweenCroatianAndBosnian(lsr1, lsr2)) {
+ return 0;
+ } else {
+ // We calculate distances both ways, and return the minimum value (= best distance).
+ return Math.min(calculateDistance(lsr1, lsr2), calculateDistance(lsr2, lsr1));
+ }
+ }
+
+ private static int calculateDistance(final LSR lsr1, final LSR lsr2) {
+ return Math.abs(
+ LOCALE_DISTANCE_INSTANCE.getBestIndexAndDistance(
+ lsr1,
+ new LSR[] {lsr2},
+ LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH,
+ LOCALE_DISTANCE_SHIFTED,
+ LOCALE_DISTANCE_FAVOR_SUBTAG,
+ LOCALE_DISTANCE_DIRECTION));
+ }
+
+ static int convertDistanceToAffinityScore(final int distance) {
+ if (distance > DISTANCE_THRESHOLD) {
+ return 0;
+ } else {
+ return (int) ((DISTANCE_THRESHOLD - distance) / DISTANCE_THRESHOLD * 100.0);
+ }
+ }
+
+ static LocaleAffinity convertScoreToLocaleAffinity(final int score) {
+ if (score > SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE) {
+ return LocaleAffinity.MUTUALLY_INTELLIGIBLE;
+ } else if (score > SCORE_THRESHOLD_HIGH) {
+ return LocaleAffinity.HIGH;
+ } else if (score > SCORE_THRESHOLD_LOW) {
+ return LocaleAffinity.LOW;
+ } else {
+ return LocaleAffinity.NONE;
+ }
+ }
+
+ private static boolean calculatingDistanceBetweenCroatianAndBosnian(
+ final LSR lsr1, final LSR lsr2) {
+ return (lsr1.language.equals(LANGUAGE_CODE_CROATIAN)
+ && lsr2.language.equals(LANGUAGE_CODE_BOSNIAN))
+ || (lsr1.language.equals(LANGUAGE_CODE_BOSNIAN)
+ && lsr2.language.equals(LANGUAGE_CODE_CROATIAN));
+ }
+
+ static LSR getMaximizedLanguageScriptRegion(final ULocale locale) {
+ return LIKELY_SUBTAGS_INSTANCE.makeMaximizedLsrFrom(
+ locale, LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH);
+ }
+
+ /**
+ * Returns a {@link Builder} instance that will allow you to manually create a {@link
+ * LocaleAffinityBiCalculatorBaseImpl} instance.
+ *
+ * @return The builder
+ */
+ public static Builder builder() {
+ return new AutoValue_LocaleAffinityBiCalculatorBaseImpl.Builder();
+ }
+
+ /** A builder for a {@link LocaleAffinityBiCalculatorBaseImpl}. */
+ @AutoValue.Builder
+ public abstract static class Builder {
+ Builder() {} // package private constructor
+
+ abstract LocaleAffinityBiCalculatorBaseImpl autoBuild();
+
+ /** Builds a {@link LocaleAffinityCalculator} out of this builder. */
+ public final LocaleAffinityBiCalculator build() {
+ return autoBuild();
+ }
+ }
+}
diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java
index 00b50dd..d05f262 100644
--- a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java
+++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java
@@ -20,16 +20,16 @@
package com.spotify.i18n.locales.common.impl;
+import static com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl.convertDistanceToAffinityScore;
+import static com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl.convertScoreToLocaleAffinity;
+import static com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl.getBestDistanceBetweenLSR;
+import static com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl.getMaximizedLanguageScriptRegion;
import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isRootLocale;
import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale;
import com.google.auto.value.AutoValue;
import com.google.common.base.Preconditions;
import com.ibm.icu.impl.locale.LSR;
-import com.ibm.icu.impl.locale.LikelySubtags;
-import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.util.LocaleMatcher.Direction;
-import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.ULocale;
import com.spotify.i18n.locales.common.LocaleAffinityCalculator;
import com.spotify.i18n.locales.common.model.LocaleAffinity;
@@ -39,6 +39,7 @@
import edu.umd.cs.findbugs.annotations.Nullable;
import java.util.Optional;
import java.util.Set;
+import java.util.stream.Collectors;
/**
* Base implementation of {@link LocaleAffinityCalculator} that calculates the locale affinity for a
@@ -52,37 +53,6 @@
@AutoValue
public abstract class LocaleAffinityCalculatorBaseImpl implements LocaleAffinityCalculator {
- // LocaleDistance.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the
- // only way to make use of this class, which provides the features we need here.
- private static final LocaleDistance LOCALE_DISTANCE_INSTANCE = LocaleDistance.INSTANCE;
-
- // LocaleDistance best distance method arguments, all assigned to their default as per icu
- // implementation.
- private static final int LOCALE_DISTANCE_SHIFTED =
- LocaleDistance.shiftDistance(LOCALE_DISTANCE_INSTANCE.getDefaultScriptDistance());
- private static final int LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH = 1;
- private static final FavorSubtag LOCALE_DISTANCE_FAVOR_SUBTAG = FavorSubtag.LANGUAGE;
- private static final Direction LOCALE_DISTANCE_DIRECTION = Direction.WITH_ONE_WAY;
-
- // LikelySubtags.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the
- // only way to make use of this class, which provides the features we need here.
- private static final LikelySubtags LIKELY_SUBTAGS_INSTANCE = LikelySubtags.INSTANCE;
-
- // LikelySubtags method arguments, all assigned to their default as per icu implementation.
- private static final boolean LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH = false;
-
- // Distance threshold: Anything above this value will be scored 0.
- private static final double DISTANCE_THRESHOLD = 224.0;
-
- // Score to affinity thresholds
- private static final int SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE = 65;
- private static final int SCORE_THRESHOLD_HIGH = 30;
- private static final int SCORE_THRESHOLD_LOW = 0;
-
- // Language codes for which we need some manual tweaks
- private static final String LANGUAGE_CODE_CROATIAN = "hr";
- private static final String LANGUAGE_CODE_BOSNIAN = "bs";
-
/**
* Returns the set of {@link ULocale} against which affinity is being calculated.
*
@@ -90,6 +60,21 @@ public abstract class LocaleAffinityCalculatorBaseImpl implements LocaleAffinity
*/
public abstract Set againstLocales();
+ /**
+ * Returns the set of best matching spoken {@link ULocale} against which affinity is being
+ * calculated.
+ *
+ * @return set of locales
+ */
+ abstract Set againstSpokenLocales();
+
+ /**
+ * Returns the set of maximized {@link LSR} against which affinity is being calculated.
+ *
+ * @return set of locales
+ */
+ abstract Set againstMaximizedLSRs();
+
/**
* Returns the calculated {@link LocaleAffinityResult} for the given language tag
*
@@ -118,10 +103,7 @@ private boolean hasSameSpokenLanguageAffinity(@Nullable final String languageTag
return LanguageUtils.getSpokenLanguageLocale(languageTag)
.map(
spokenLanguageLocale ->
- againstLocales().stream()
- .map(ULocale::toLanguageTag)
- .map(LanguageUtils::getSpokenLanguageLocale)
- .flatMap(Optional::stream)
+ againstSpokenLocales().stream()
.anyMatch(
againstSpokenLocale ->
isSameLocale(spokenLanguageLocale, againstSpokenLocale)))
@@ -136,69 +118,17 @@ private LocaleAffinity calculateScoreBasedAffinity(String languageTag) {
private int getBestDistance(@Nullable final String languageTag) {
return LanguageTagUtils.parse(languageTag)
- .map(LocaleAffinityCalculatorBaseImpl::getMaximizedLanguageScriptRegion)
+ .filter(LocaleAffinityBiCalculatorBaseImpl::isAvailableLanguage)
+ .map(parsed -> getMaximizedLanguageScriptRegion(parsed))
.map(
maxParsed ->
- againstLocales().stream()
- .map(LocaleAffinityCalculatorBaseImpl::getMaximizedLanguageScriptRegion)
- .map(
- maxSupported ->
- getDistanceBetweenInputAndSupported(maxParsed, maxSupported))
- .map(Math::abs)
+ againstMaximizedLSRs().stream()
+ .map(maxAgainst -> getBestDistanceBetweenLSR(maxParsed, maxAgainst))
.min(Integer::compare)
.orElse(Integer.MAX_VALUE))
.orElse(Integer.MAX_VALUE);
}
- private int convertDistanceToAffinityScore(final int distance) {
- if (distance > DISTANCE_THRESHOLD) {
- return 0;
- } else {
- return (int) ((DISTANCE_THRESHOLD - distance) / DISTANCE_THRESHOLD * 100.0);
- }
- }
-
- private LocaleAffinity convertScoreToLocaleAffinity(final int score) {
- if (score > SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE) {
- return LocaleAffinity.MUTUALLY_INTELLIGIBLE;
- } else if (score > SCORE_THRESHOLD_HIGH) {
- return LocaleAffinity.HIGH;
- } else if (score > SCORE_THRESHOLD_LOW) {
- return LocaleAffinity.LOW;
- } else {
- return LocaleAffinity.NONE;
- }
- }
-
- private int getDistanceBetweenInputAndSupported(final LSR maxParsed, final LSR maxSupported) {
- // Croatian should be matched with Bosnian. This is the case for Bosnian written in Latin
- // script, but not Cyrillic, because the ICU implementation enforces script matching. We
- // created a workaround to ensure that we return a MUTUALLY_INTELLIGIBLE affinity when
- // encountering this locale.
- if (calculatingDistanceBetweenCroatianAndBosnian(maxParsed, maxSupported)) {
- return 0;
- }
- return LOCALE_DISTANCE_INSTANCE.getBestIndexAndDistance(
- maxParsed,
- new LSR[] {maxSupported},
- LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH,
- LOCALE_DISTANCE_SHIFTED,
- LOCALE_DISTANCE_FAVOR_SUBTAG,
- LOCALE_DISTANCE_DIRECTION);
- }
-
- private boolean calculatingDistanceBetweenCroatianAndBosnian(final LSR lsr1, final LSR lsr2) {
- return (lsr1.language.equals(LANGUAGE_CODE_CROATIAN)
- && lsr2.language.equals(LANGUAGE_CODE_BOSNIAN))
- || (lsr1.language.equals(LANGUAGE_CODE_BOSNIAN)
- && lsr2.language.equals(LANGUAGE_CODE_CROATIAN));
- }
-
- private static LSR getMaximizedLanguageScriptRegion(final ULocale locale) {
- return LIKELY_SUBTAGS_INSTANCE.makeMaximizedLsrFrom(
- locale, LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH);
- }
-
/**
* Returns a {@link Builder} instance that will allow you to manually create a {@link
* LocaleAffinityCalculatorBaseImpl} instance.
@@ -222,17 +152,56 @@ public abstract static class Builder {
*/
public abstract Builder againstLocales(final Set locales);
+ /**
+ * Configures the set of best matching spoken {@link ULocale} against which affinity will be
+ * calculated.
+ *
+ * @param locales spoken locales
+ * @return The {@link Builder} instance
+ */
+ abstract Builder againstSpokenLocales(final Set locales);
+
+ /**
+ * Configures the set of maximized {@link LSR} against which affinity will be calculated.
+ *
+ * @param maximizedLSR
+ * @return The {@link Builder} instance
+ */
+ abstract Builder againstMaximizedLSRs(final Set maximizedLSR);
+
+ abstract Set againstLocales();
+
abstract LocaleAffinityCalculatorBaseImpl autoBuild();
/** Builds a {@link LocaleAffinityCalculator} out of this builder. */
public final LocaleAffinityCalculator build() {
- final LocaleAffinityCalculatorBaseImpl built = autoBuild();
- for (ULocale locale : built.againstLocales()) {
+ for (ULocale locale : againstLocales()) {
Preconditions.checkState(
!isRootLocale(locale),
"The locales against which affinity needs to be calculated cannot contain the root.");
}
- return built;
+
+ // Filter out locales with a language unavailable in CLDR
+ againstLocales(
+ againstLocales().stream()
+ .filter(LocaleAffinityBiCalculatorBaseImpl::isAvailableLanguage)
+ .collect(Collectors.toSet()));
+
+ // Prepare the best matching spoken locales set, for faster calculations
+ againstSpokenLocales(
+ againstLocales().stream()
+ .map(ULocale::toLanguageTag)
+ .map(LanguageUtils::getSpokenLanguageLocale)
+ .flatMap(Optional::stream)
+ .collect(Collectors.toSet()));
+
+ // Prepare the maximized LSR set, for faster calculations
+ againstMaximizedLSRs(
+ againstLocales().stream()
+ .map(LocaleAffinityBiCalculatorBaseImpl::getMaximizedLanguageScriptRegion)
+ .collect(Collectors.toSet()));
+
+ return autoBuild();
}
}
}
diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImplTest.java
new file mode 100644
index 0000000..002374e
--- /dev/null
+++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImplTest.java
@@ -0,0 +1,164 @@
+/*-
+ * -\-\-
+ * locales-common
+ * --
+ * Copyright (C) 2016 - 2025 Spotify AB
+ * --
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * -/-/-
+ */
+
+package com.spotify.i18n.locales.common.impl;
+
+import static com.spotify.i18n.locales.common.model.LocaleAffinity.HIGH;
+import static com.spotify.i18n.locales.common.model.LocaleAffinity.LOW;
+import static com.spotify.i18n.locales.common.model.LocaleAffinity.MUTUALLY_INTELLIGIBLE;
+import static com.spotify.i18n.locales.common.model.LocaleAffinity.NONE;
+import static com.spotify.i18n.locales.common.model.LocaleAffinity.SAME;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
+import static org.junit.jupiter.api.Assertions.*;
+
+import com.ibm.icu.util.ULocale;
+import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator;
+import com.spotify.i18n.locales.common.LocaleAffinityCalculator;
+import com.spotify.i18n.locales.common.model.LocaleAffinity;
+import com.spotify.i18n.locales.common.model.LocaleAffinityResult;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+class LocaleAffinityBiCalculatorBaseImplTest {
+
+ private static final LocaleAffinityBiCalculator BI_CALCULATOR =
+ LocaleAffinityBiCalculatorBaseImpl.builder().build();
+
+ @ParameterizedTest
+ @MethodSource
+ void whenCalculating_returnsExpectedAffinity(
+ final String languageTag1, final String languageTag2, final LocaleAffinity expectedAffinity) {
+ assertThat(
+ BI_CALCULATOR.calculate(languageTag1, languageTag2),
+ is(LocaleAffinityResult.builder().affinity(expectedAffinity).build()));
+ }
+
+ public static Stream whenCalculating_returnsExpectedAffinity() {
+ return Stream.of(
+ // Edge cases
+ Arguments.of("What is this?", " An invalid language tag!", NONE),
+ Arguments.of("ok-gargabe", "ok-junk", NONE),
+ Arguments.of("apples-and-bananas", "oranges-and-pears", NONE),
+ Arguments.of(" ", "", NONE),
+ Arguments.of(null, null, NONE),
+ Arguments.of("an", " An invalid language tag!", NONE),
+ Arguments.of("ok", "ok-junk", NONE),
+ Arguments.of("oranges", "oranges-and-pears", NONE),
+
+ // Catalan should be matched with Spanish, both ways
+ Arguments.of("es", "ca", LOW),
+ Arguments.of("ca-ES", "es-AD", LOW),
+ Arguments.of("ca-AD", "es-419", LOW),
+
+ // Bosnian should be matched for all scripts and regions
+ Arguments.of("bs-Cyrl", "bs", SAME),
+ Arguments.of("bs-Cyrl", "bs-Latn", SAME),
+ Arguments.of("bs-Cyrl-MK", "bs-Cyrl", SAME),
+ Arguments.of("bs-Cyrl", "bs-BA", SAME),
+ Arguments.of("bs", "bs-Latn-BA", SAME),
+ Arguments.of("bs-Latn-US", "bs-Cyrl-BA", SAME),
+
+ // German and Swiss German
+ Arguments.of("de-DE", "gsw-AT", MUTUALLY_INTELLIGIBLE),
+ Arguments.of("de-CH", "gsw-CH", MUTUALLY_INTELLIGIBLE),
+ Arguments.of("gsw-CH", "de-CH", MUTUALLY_INTELLIGIBLE),
+
+ // English
+ Arguments.of("en", "en-AU", SAME),
+ Arguments.of("en-GB", "fr", NONE),
+ Arguments.of("en-US", "nb", NONE),
+
+ // Spanish in Europe should be matched with Spanish Latin America
+ Arguments.of("es-150", "es-419", SAME),
+ Arguments.of("es-ES", "es-GB", SAME),
+ Arguments.of("es-US", "es-AR", SAME),
+
+ // Basque should be matched with Spanish
+ Arguments.of("es", "eu", LOW),
+ Arguments.of("eu", "es", LOW),
+
+ // French
+ Arguments.of("fr", "fr-CH", SAME),
+ Arguments.of("fr-BE", "fr-HI", SAME),
+ Arguments.of("fr-CA", "fr-US", SAME),
+ Arguments.of("fr-FR", "fr-JP", SAME),
+
+ // Galician should be matched, since we support Spanish
+ Arguments.of("gl", "es", LOW),
+ Arguments.of("es", "gl", LOW),
+
+ // Hindi shouldn't be matched with Tamil
+ Arguments.of("hi", "ta", NONE),
+
+ // Hindi (Latin) and English should be matched
+ Arguments.of("hi-Latn", "en-GB", NONE),
+
+ // Croatian should be nicely matched with Bosnian
+ Arguments.of("bs-Latn", "hr-HR", MUTUALLY_INTELLIGIBLE),
+ Arguments.of("hr-BA", "bs-Cyrl", MUTUALLY_INTELLIGIBLE),
+
+ // Serbian Cyrillic should be matched with Serbian Latin
+ Arguments.of("sr-Latn", "sr", SAME),
+ Arguments.of("sr", "sr-Latn", SAME),
+ Arguments.of("sr-Latn-MK", "sr-Cyrl-ME", SAME),
+
+ // Portuguese
+ Arguments.of("pt-PT", "pt", SAME),
+ Arguments.of("pt-BR", "pt-PT", SAME),
+ Arguments.of("pt-SE", "pt-JP", SAME),
+ Arguments.of("pt-US", "pt-CL", SAME),
+
+ // Norwegian, Norwegian Bokmål, Nynorst
+ Arguments.of("nb", "da", HIGH),
+ Arguments.of("nn", "nb", SAME),
+ Arguments.of("no", "nb", SAME),
+
+ // Uzbek should be matched as SAME for all scripts
+ Arguments.of("uz-Arab", "uz-Cyrl", SAME),
+ Arguments.of("uz-Cyrl", "uz", SAME),
+ Arguments.of("uz", "uz-Arab", SAME),
+
+ // Traditional Chinese shouldn't be matched with Simplified
+ Arguments.of("zh-Hant", "zh-CN", NONE),
+ Arguments.of("zh-Hant", "zh", NONE),
+ Arguments.of("zh-MK", "zh-CN", SAME),
+ Arguments.of("zh-FR", "zh-CN", SAME),
+ Arguments.of("zh-TW", "zh-US", SAME));
+ }
+
+ @Test
+ void whenCalculatingAffinityForSwedishAgainstBokmaalNorwegianAndDanish_returnsNone() {
+ final LocaleAffinityCalculator matcher =
+ LocaleAffinityCalculatorBaseImpl.builder()
+ .againstLocales(
+ Set.of("da", "nb", "no").stream()
+ .map(ULocale::forLanguageTag)
+ .collect(Collectors.toSet()))
+ .build();
+
+ assertThat(matcher.calculate("sv"), is(LocaleAffinityResult.builder().affinity(NONE).build()));
+ }
+}
diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java
index b931f2f..043d8d2 100644
--- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java
+++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java
@@ -28,6 +28,7 @@
import static org.hamcrest.Matchers.is;
import static org.junit.jupiter.api.Assertions.*;
+import com.ibm.icu.impl.locale.LSR;
import com.ibm.icu.util.ULocale;
import com.spotify.i18n.locales.common.LocaleAffinityCalculator;
import com.spotify.i18n.locales.common.model.LocaleAffinity;
@@ -60,7 +61,7 @@ void whenBuildingWithMissingRequiredProperties_buildFails() {
assertThrows(
IllegalStateException.class, () -> LocaleAffinityCalculatorBaseImpl.builder().build());
- assertEquals(thrown.getMessage(), "Missing required properties: againstLocales");
+ assertEquals(thrown.getMessage(), "Property \"againstLocales\" has not been set");
}
@Test
@@ -78,6 +79,63 @@ void whenBuildingWithRootAsPartOfAgainstLocales_buildFails() {
"The locales against which affinity needs to be calculated cannot contain the root.");
}
+ @Test
+ void whenBuildingWithUnavailableLocale_buildSucceeds() {
+ LocaleAffinityCalculatorBaseImpl built =
+ (LocaleAffinityCalculatorBaseImpl)
+ LocaleAffinityCalculatorBaseImpl.builder()
+ .againstLocales(
+ Set.of(ULocale.forLanguageTag("apples"), ULocale.forLanguageTag("English")))
+ .build();
+
+ assertTrue(built.againstLocales().isEmpty());
+ assertTrue(built.againstSpokenLocales().isEmpty());
+ assertTrue(built.againstMaximizedLSRs().isEmpty());
+ }
+
+ @Test
+ void whenBuildingForHappyPath_buildSucceedsAndStructuresArePrepared() {
+ final Set againstLocales =
+ Set.of(
+ ULocale.FRENCH,
+ ULocale.CANADA_FRENCH,
+ ULocale.ENGLISH,
+ ULocale.CHINESE,
+ ULocale.TRADITIONAL_CHINESE,
+ ULocale.GERMANY,
+ ULocale.forLanguageTag("de-AT"),
+ ULocale.forLanguageTag("de-CH"),
+ ULocale.JAPAN);
+
+ LocaleAffinityCalculatorBaseImpl built =
+ (LocaleAffinityCalculatorBaseImpl)
+ LocaleAffinityCalculatorBaseImpl.builder().againstLocales(againstLocales).build();
+
+ assertEquals(againstLocales, built.againstLocales());
+ assertEquals(
+ Set.of(
+ ULocale.forLanguageTag("de"),
+ ULocale.forLanguageTag("fr"),
+ ULocale.forLanguageTag("en"),
+ ULocale.forLanguageTag("ja"),
+ ULocale.forLanguageTag("zh-Hans"),
+ ULocale.forLanguageTag("zh-Hant")),
+ built.againstSpokenLocales());
+
+ assertEquals(
+ Set.of(
+ "de-Latn-AT",
+ "de-Latn-CH",
+ "de-Latn-DE",
+ "en-Latn-US",
+ "fr-Latn-CA",
+ "fr-Latn-FR",
+ "ja-Jpan-JP",
+ "zh-Hans-CN",
+ "zh-Hant-TW"),
+ built.againstMaximizedLSRs().stream().map(LSR::toString).collect(Collectors.toSet()));
+ }
+
@ParameterizedTest
@MethodSource(value = "whenCalculating_returnsExpectedAffinity")
void whenCalculatingAgainstEmptySetOfLocales_alwaysReturnsAffinityNone(final String languageTag) {