diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java index 829a551..f291469 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import com.ibm.icu.util.ULocale; +import com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl; import com.spotify.i18n.locales.common.impl.LocaleAffinityCalculatorBaseImpl; import com.spotify.i18n.locales.common.impl.ReferenceLocalesCalculatorBaseImpl; import com.spotify.i18n.locales.common.model.LocaleAffinity; @@ -125,7 +126,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags( * @see LocaleAffinityBiCalculator */ public LocaleAffinityBiCalculator buildAffinityBiCalculator() { - return ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator(); + return LocaleAffinityBiCalculatorBaseImpl.builder().build(); } /** diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImpl.java new file mode 100644 index 0000000..0325a94 --- /dev/null +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImpl.java @@ -0,0 +1,226 @@ +/*- + * -\-\- + * locales-common + * -- + * Copyright (C) 2016 - 2025 Spotify AB + * -- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * -/-/- + */ + +package com.spotify.i18n.locales.common.impl; + +import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale; +import static com.spotify.i18n.locales.utils.language.LanguageUtils.getSpokenLanguageLocale; +import static com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils.parse; + +import com.google.auto.value.AutoValue; +import com.ibm.icu.impl.locale.LSR; +import com.ibm.icu.impl.locale.LikelySubtags; +import com.ibm.icu.impl.locale.LocaleDistance; +import com.ibm.icu.util.LocaleMatcher.Direction; +import com.ibm.icu.util.LocaleMatcher.FavorSubtag; +import com.ibm.icu.util.ULocale; +import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator; +import com.spotify.i18n.locales.common.LocaleAffinityCalculator; +import com.spotify.i18n.locales.common.model.LocaleAffinity; +import com.spotify.i18n.locales.common.model.LocaleAffinityResult; +import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils; +import edu.umd.cs.findbugs.annotations.Nullable; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Base implementation of {@link LocaleAffinityBiCalculator} that calculates the locale affinity + * between two given language tags. + * + *

This class is not intended for public subclassing. New object instances must be created using + * the builder pattern, starting with the {@link #builder()} method. + * + * @author Eric Fjøsne + */ +@AutoValue +public abstract class LocaleAffinityBiCalculatorBaseImpl implements LocaleAffinityBiCalculator { + + // Set containing all available language codes in CLDR. + private static final Set AVAILABLE_LANGUAGE_CODES = + AvailableLocalesUtils.getReferenceLocales().stream() + .map(ULocale::getLanguage) + .collect(Collectors.toSet()); + + // LocaleDistance.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the + // only way to make use of this class, which provides the features we need here. + private static final LocaleDistance LOCALE_DISTANCE_INSTANCE = LocaleDistance.INSTANCE; + + // LocaleDistance best distance method arguments, all assigned to their default as per icu + // implementation. + private static final int LOCALE_DISTANCE_SHIFTED = + LocaleDistance.shiftDistance(LOCALE_DISTANCE_INSTANCE.getDefaultScriptDistance()); + private static final int LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH = 1; + private static final FavorSubtag LOCALE_DISTANCE_FAVOR_SUBTAG = FavorSubtag.LANGUAGE; + private static final Direction LOCALE_DISTANCE_DIRECTION = Direction.WITH_ONE_WAY; + + // LikelySubtags.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the + // only way to make use of this class, which provides the features we need here. + private static final LikelySubtags LIKELY_SUBTAGS_INSTANCE = LikelySubtags.INSTANCE; + + // LikelySubtags method arguments, all assigned to their default as per icu implementation. + private static final boolean LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH = false; + + // Distance threshold: Anything above this value will be scored 0. + private static final double DISTANCE_THRESHOLD = 224.0; + + // Score to affinity thresholds + private static final int SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE = 65; + private static final int SCORE_THRESHOLD_HIGH = 30; + private static final int SCORE_THRESHOLD_LOW = 0; + + // Language codes for which we need some manual tweaks + private static final String LANGUAGE_CODE_CROATIAN = "hr"; + private static final String LANGUAGE_CODE_BOSNIAN = "bs"; + + /** + * Returns the calculated {@link LocaleAffinityResult} for the given two language tags + * + * @return the locale affinity result + */ + @Override + public LocaleAffinityResult calculate( + @Nullable final String languageTag1, @Nullable final String languageTag2) { + return LocaleAffinityResult.builder().affinity(getAffinity(languageTag1, languageTag2)).build(); + } + + private LocaleAffinity getAffinity( + @Nullable final String languageTag1, @Nullable final String languageTag2) { + // We parse the language tags, and filter out locales with a language unavailable in CLDR. + final Optional locale1 = + parse(languageTag1).filter(locale -> isAvailableLanguage(locale)); + final Optional locale2 = + parse(languageTag2).filter(locale -> isAvailableLanguage(locale)); + + if (locale1.isPresent() && locale2.isPresent()) { + // We attempt to match based on corresponding spoken language first, and make use of the + // score-based affinity calculation as fallback. + if (hasSameSpokenLanguageAffinity(locale1.get(), locale2.get())) { + return LocaleAffinity.SAME; + } else { + return calculateScoreBasedAffinity(locale1.get(), locale2.get()); + } + } else { + return LocaleAffinity.NONE; + } + } + + private boolean hasSameSpokenLanguageAffinity(final ULocale locale1, final ULocale locale2) { + final Optional spoken1 = getSpokenLanguageLocale(locale1.toLanguageTag()); + final Optional spoken2 = getSpokenLanguageLocale(locale2.toLanguageTag()); + return spoken1.isPresent() && spoken2.isPresent() && isSameLocale(spoken1.get(), spoken2.get()); + } + + static LocaleAffinity calculateScoreBasedAffinity(final ULocale l1, final ULocale l2) { + int bestDistance = getBestDistanceBetweenLocales(l1, l2); + int correspondingScore = convertDistanceToAffinityScore(bestDistance); + return convertScoreToLocaleAffinity(correspondingScore); + } + + static boolean isAvailableLanguage(final ULocale locale) { + return AVAILABLE_LANGUAGE_CODES.contains(locale.getLanguage().toLowerCase()); + } + + private static int getBestDistanceBetweenLocales(final ULocale locale1, final ULocale locale2) { + final LSR lsr1 = getMaximizedLanguageScriptRegion(locale1); + final LSR lsr2 = getMaximizedLanguageScriptRegion(locale2); + return getBestDistanceBetweenLSR(lsr1, lsr2); + } + + static int getBestDistanceBetweenLSR(final LSR lsr1, final LSR lsr2) { + // Croatian should be matched with Bosnian. This is the case for Bosnian written in Latin + // script, but not Cyrillic, because the ICU implementation enforces script matching. We + // created a workaround to ensure that we return a MUTUALLY_INTELLIGIBLE affinity when + // encountering this locale. + if (calculatingDistanceBetweenCroatianAndBosnian(lsr1, lsr2)) { + return 0; + } else { + // We calculate distances both ways, and return the minimum value (= best distance). + return Math.min(calculateDistance(lsr1, lsr2), calculateDistance(lsr2, lsr1)); + } + } + + private static int calculateDistance(final LSR lsr1, final LSR lsr2) { + return Math.abs( + LOCALE_DISTANCE_INSTANCE.getBestIndexAndDistance( + lsr1, + new LSR[] {lsr2}, + LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH, + LOCALE_DISTANCE_SHIFTED, + LOCALE_DISTANCE_FAVOR_SUBTAG, + LOCALE_DISTANCE_DIRECTION)); + } + + static int convertDistanceToAffinityScore(final int distance) { + if (distance > DISTANCE_THRESHOLD) { + return 0; + } else { + return (int) ((DISTANCE_THRESHOLD - distance) / DISTANCE_THRESHOLD * 100.0); + } + } + + static LocaleAffinity convertScoreToLocaleAffinity(final int score) { + if (score > SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE) { + return LocaleAffinity.MUTUALLY_INTELLIGIBLE; + } else if (score > SCORE_THRESHOLD_HIGH) { + return LocaleAffinity.HIGH; + } else if (score > SCORE_THRESHOLD_LOW) { + return LocaleAffinity.LOW; + } else { + return LocaleAffinity.NONE; + } + } + + private static boolean calculatingDistanceBetweenCroatianAndBosnian( + final LSR lsr1, final LSR lsr2) { + return (lsr1.language.equals(LANGUAGE_CODE_CROATIAN) + && lsr2.language.equals(LANGUAGE_CODE_BOSNIAN)) + || (lsr1.language.equals(LANGUAGE_CODE_BOSNIAN) + && lsr2.language.equals(LANGUAGE_CODE_CROATIAN)); + } + + static LSR getMaximizedLanguageScriptRegion(final ULocale locale) { + return LIKELY_SUBTAGS_INSTANCE.makeMaximizedLsrFrom( + locale, LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH); + } + + /** + * Returns a {@link Builder} instance that will allow you to manually create a {@link + * LocaleAffinityBiCalculatorBaseImpl} instance. + * + * @return The builder + */ + public static Builder builder() { + return new AutoValue_LocaleAffinityBiCalculatorBaseImpl.Builder(); + } + + /** A builder for a {@link LocaleAffinityBiCalculatorBaseImpl}. */ + @AutoValue.Builder + public abstract static class Builder { + Builder() {} // package private constructor + + abstract LocaleAffinityBiCalculatorBaseImpl autoBuild(); + + /** Builds a {@link LocaleAffinityCalculator} out of this builder. */ + public final LocaleAffinityBiCalculator build() { + return autoBuild(); + } + } +} diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java index 00b50dd..d05f262 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java @@ -20,16 +20,16 @@ package com.spotify.i18n.locales.common.impl; +import static com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl.convertDistanceToAffinityScore; +import static com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl.convertScoreToLocaleAffinity; +import static com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl.getBestDistanceBetweenLSR; +import static com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl.getMaximizedLanguageScriptRegion; import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isRootLocale; import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale; import com.google.auto.value.AutoValue; import com.google.common.base.Preconditions; import com.ibm.icu.impl.locale.LSR; -import com.ibm.icu.impl.locale.LikelySubtags; -import com.ibm.icu.impl.locale.LocaleDistance; -import com.ibm.icu.util.LocaleMatcher.Direction; -import com.ibm.icu.util.LocaleMatcher.FavorSubtag; import com.ibm.icu.util.ULocale; import com.spotify.i18n.locales.common.LocaleAffinityCalculator; import com.spotify.i18n.locales.common.model.LocaleAffinity; @@ -39,6 +39,7 @@ import edu.umd.cs.findbugs.annotations.Nullable; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; /** * Base implementation of {@link LocaleAffinityCalculator} that calculates the locale affinity for a @@ -52,37 +53,6 @@ @AutoValue public abstract class LocaleAffinityCalculatorBaseImpl implements LocaleAffinityCalculator { - // LocaleDistance.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the - // only way to make use of this class, which provides the features we need here. - private static final LocaleDistance LOCALE_DISTANCE_INSTANCE = LocaleDistance.INSTANCE; - - // LocaleDistance best distance method arguments, all assigned to their default as per icu - // implementation. - private static final int LOCALE_DISTANCE_SHIFTED = - LocaleDistance.shiftDistance(LOCALE_DISTANCE_INSTANCE.getDefaultScriptDistance()); - private static final int LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH = 1; - private static final FavorSubtag LOCALE_DISTANCE_FAVOR_SUBTAG = FavorSubtag.LANGUAGE; - private static final Direction LOCALE_DISTANCE_DIRECTION = Direction.WITH_ONE_WAY; - - // LikelySubtags.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the - // only way to make use of this class, which provides the features we need here. - private static final LikelySubtags LIKELY_SUBTAGS_INSTANCE = LikelySubtags.INSTANCE; - - // LikelySubtags method arguments, all assigned to their default as per icu implementation. - private static final boolean LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH = false; - - // Distance threshold: Anything above this value will be scored 0. - private static final double DISTANCE_THRESHOLD = 224.0; - - // Score to affinity thresholds - private static final int SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE = 65; - private static final int SCORE_THRESHOLD_HIGH = 30; - private static final int SCORE_THRESHOLD_LOW = 0; - - // Language codes for which we need some manual tweaks - private static final String LANGUAGE_CODE_CROATIAN = "hr"; - private static final String LANGUAGE_CODE_BOSNIAN = "bs"; - /** * Returns the set of {@link ULocale} against which affinity is being calculated. * @@ -90,6 +60,21 @@ public abstract class LocaleAffinityCalculatorBaseImpl implements LocaleAffinity */ public abstract Set againstLocales(); + /** + * Returns the set of best matching spoken {@link ULocale} against which affinity is being + * calculated. + * + * @return set of locales + */ + abstract Set againstSpokenLocales(); + + /** + * Returns the set of maximized {@link LSR} against which affinity is being calculated. + * + * @return set of locales + */ + abstract Set againstMaximizedLSRs(); + /** * Returns the calculated {@link LocaleAffinityResult} for the given language tag * @@ -118,10 +103,7 @@ private boolean hasSameSpokenLanguageAffinity(@Nullable final String languageTag return LanguageUtils.getSpokenLanguageLocale(languageTag) .map( spokenLanguageLocale -> - againstLocales().stream() - .map(ULocale::toLanguageTag) - .map(LanguageUtils::getSpokenLanguageLocale) - .flatMap(Optional::stream) + againstSpokenLocales().stream() .anyMatch( againstSpokenLocale -> isSameLocale(spokenLanguageLocale, againstSpokenLocale))) @@ -136,69 +118,17 @@ private LocaleAffinity calculateScoreBasedAffinity(String languageTag) { private int getBestDistance(@Nullable final String languageTag) { return LanguageTagUtils.parse(languageTag) - .map(LocaleAffinityCalculatorBaseImpl::getMaximizedLanguageScriptRegion) + .filter(LocaleAffinityBiCalculatorBaseImpl::isAvailableLanguage) + .map(parsed -> getMaximizedLanguageScriptRegion(parsed)) .map( maxParsed -> - againstLocales().stream() - .map(LocaleAffinityCalculatorBaseImpl::getMaximizedLanguageScriptRegion) - .map( - maxSupported -> - getDistanceBetweenInputAndSupported(maxParsed, maxSupported)) - .map(Math::abs) + againstMaximizedLSRs().stream() + .map(maxAgainst -> getBestDistanceBetweenLSR(maxParsed, maxAgainst)) .min(Integer::compare) .orElse(Integer.MAX_VALUE)) .orElse(Integer.MAX_VALUE); } - private int convertDistanceToAffinityScore(final int distance) { - if (distance > DISTANCE_THRESHOLD) { - return 0; - } else { - return (int) ((DISTANCE_THRESHOLD - distance) / DISTANCE_THRESHOLD * 100.0); - } - } - - private LocaleAffinity convertScoreToLocaleAffinity(final int score) { - if (score > SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE) { - return LocaleAffinity.MUTUALLY_INTELLIGIBLE; - } else if (score > SCORE_THRESHOLD_HIGH) { - return LocaleAffinity.HIGH; - } else if (score > SCORE_THRESHOLD_LOW) { - return LocaleAffinity.LOW; - } else { - return LocaleAffinity.NONE; - } - } - - private int getDistanceBetweenInputAndSupported(final LSR maxParsed, final LSR maxSupported) { - // Croatian should be matched with Bosnian. This is the case for Bosnian written in Latin - // script, but not Cyrillic, because the ICU implementation enforces script matching. We - // created a workaround to ensure that we return a MUTUALLY_INTELLIGIBLE affinity when - // encountering this locale. - if (calculatingDistanceBetweenCroatianAndBosnian(maxParsed, maxSupported)) { - return 0; - } - return LOCALE_DISTANCE_INSTANCE.getBestIndexAndDistance( - maxParsed, - new LSR[] {maxSupported}, - LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH, - LOCALE_DISTANCE_SHIFTED, - LOCALE_DISTANCE_FAVOR_SUBTAG, - LOCALE_DISTANCE_DIRECTION); - } - - private boolean calculatingDistanceBetweenCroatianAndBosnian(final LSR lsr1, final LSR lsr2) { - return (lsr1.language.equals(LANGUAGE_CODE_CROATIAN) - && lsr2.language.equals(LANGUAGE_CODE_BOSNIAN)) - || (lsr1.language.equals(LANGUAGE_CODE_BOSNIAN) - && lsr2.language.equals(LANGUAGE_CODE_CROATIAN)); - } - - private static LSR getMaximizedLanguageScriptRegion(final ULocale locale) { - return LIKELY_SUBTAGS_INSTANCE.makeMaximizedLsrFrom( - locale, LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH); - } - /** * Returns a {@link Builder} instance that will allow you to manually create a {@link * LocaleAffinityCalculatorBaseImpl} instance. @@ -222,17 +152,56 @@ public abstract static class Builder { */ public abstract Builder againstLocales(final Set locales); + /** + * Configures the set of best matching spoken {@link ULocale} against which affinity will be + * calculated. + * + * @param locales spoken locales + * @return The {@link Builder} instance + */ + abstract Builder againstSpokenLocales(final Set locales); + + /** + * Configures the set of maximized {@link LSR} against which affinity will be calculated. + * + * @param maximizedLSR + * @return The {@link Builder} instance + */ + abstract Builder againstMaximizedLSRs(final Set maximizedLSR); + + abstract Set againstLocales(); + abstract LocaleAffinityCalculatorBaseImpl autoBuild(); /** Builds a {@link LocaleAffinityCalculator} out of this builder. */ public final LocaleAffinityCalculator build() { - final LocaleAffinityCalculatorBaseImpl built = autoBuild(); - for (ULocale locale : built.againstLocales()) { + for (ULocale locale : againstLocales()) { Preconditions.checkState( !isRootLocale(locale), "The locales against which affinity needs to be calculated cannot contain the root."); } - return built; + + // Filter out locales with a language unavailable in CLDR + againstLocales( + againstLocales().stream() + .filter(LocaleAffinityBiCalculatorBaseImpl::isAvailableLanguage) + .collect(Collectors.toSet())); + + // Prepare the best matching spoken locales set, for faster calculations + againstSpokenLocales( + againstLocales().stream() + .map(ULocale::toLanguageTag) + .map(LanguageUtils::getSpokenLanguageLocale) + .flatMap(Optional::stream) + .collect(Collectors.toSet())); + + // Prepare the maximized LSR set, for faster calculations + againstMaximizedLSRs( + againstLocales().stream() + .map(LocaleAffinityBiCalculatorBaseImpl::getMaximizedLanguageScriptRegion) + .collect(Collectors.toSet())); + + return autoBuild(); } } } diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImplTest.java new file mode 100644 index 0000000..002374e --- /dev/null +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityBiCalculatorBaseImplTest.java @@ -0,0 +1,164 @@ +/*- + * -\-\- + * locales-common + * -- + * Copyright (C) 2016 - 2025 Spotify AB + * -- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * -/-/- + */ + +package com.spotify.i18n.locales.common.impl; + +import static com.spotify.i18n.locales.common.model.LocaleAffinity.HIGH; +import static com.spotify.i18n.locales.common.model.LocaleAffinity.LOW; +import static com.spotify.i18n.locales.common.model.LocaleAffinity.MUTUALLY_INTELLIGIBLE; +import static com.spotify.i18n.locales.common.model.LocaleAffinity.NONE; +import static com.spotify.i18n.locales.common.model.LocaleAffinity.SAME; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.*; + +import com.ibm.icu.util.ULocale; +import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator; +import com.spotify.i18n.locales.common.LocaleAffinityCalculator; +import com.spotify.i18n.locales.common.model.LocaleAffinity; +import com.spotify.i18n.locales.common.model.LocaleAffinityResult; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +class LocaleAffinityBiCalculatorBaseImplTest { + + private static final LocaleAffinityBiCalculator BI_CALCULATOR = + LocaleAffinityBiCalculatorBaseImpl.builder().build(); + + @ParameterizedTest + @MethodSource + void whenCalculating_returnsExpectedAffinity( + final String languageTag1, final String languageTag2, final LocaleAffinity expectedAffinity) { + assertThat( + BI_CALCULATOR.calculate(languageTag1, languageTag2), + is(LocaleAffinityResult.builder().affinity(expectedAffinity).build())); + } + + public static Stream whenCalculating_returnsExpectedAffinity() { + return Stream.of( + // Edge cases + Arguments.of("What is this?", " An invalid language tag!", NONE), + Arguments.of("ok-gargabe", "ok-junk", NONE), + Arguments.of("apples-and-bananas", "oranges-and-pears", NONE), + Arguments.of(" ", "", NONE), + Arguments.of(null, null, NONE), + Arguments.of("an", " An invalid language tag!", NONE), + Arguments.of("ok", "ok-junk", NONE), + Arguments.of("oranges", "oranges-and-pears", NONE), + + // Catalan should be matched with Spanish, both ways + Arguments.of("es", "ca", LOW), + Arguments.of("ca-ES", "es-AD", LOW), + Arguments.of("ca-AD", "es-419", LOW), + + // Bosnian should be matched for all scripts and regions + Arguments.of("bs-Cyrl", "bs", SAME), + Arguments.of("bs-Cyrl", "bs-Latn", SAME), + Arguments.of("bs-Cyrl-MK", "bs-Cyrl", SAME), + Arguments.of("bs-Cyrl", "bs-BA", SAME), + Arguments.of("bs", "bs-Latn-BA", SAME), + Arguments.of("bs-Latn-US", "bs-Cyrl-BA", SAME), + + // German and Swiss German + Arguments.of("de-DE", "gsw-AT", MUTUALLY_INTELLIGIBLE), + Arguments.of("de-CH", "gsw-CH", MUTUALLY_INTELLIGIBLE), + Arguments.of("gsw-CH", "de-CH", MUTUALLY_INTELLIGIBLE), + + // English + Arguments.of("en", "en-AU", SAME), + Arguments.of("en-GB", "fr", NONE), + Arguments.of("en-US", "nb", NONE), + + // Spanish in Europe should be matched with Spanish Latin America + Arguments.of("es-150", "es-419", SAME), + Arguments.of("es-ES", "es-GB", SAME), + Arguments.of("es-US", "es-AR", SAME), + + // Basque should be matched with Spanish + Arguments.of("es", "eu", LOW), + Arguments.of("eu", "es", LOW), + + // French + Arguments.of("fr", "fr-CH", SAME), + Arguments.of("fr-BE", "fr-HI", SAME), + Arguments.of("fr-CA", "fr-US", SAME), + Arguments.of("fr-FR", "fr-JP", SAME), + + // Galician should be matched, since we support Spanish + Arguments.of("gl", "es", LOW), + Arguments.of("es", "gl", LOW), + + // Hindi shouldn't be matched with Tamil + Arguments.of("hi", "ta", NONE), + + // Hindi (Latin) and English should be matched + Arguments.of("hi-Latn", "en-GB", NONE), + + // Croatian should be nicely matched with Bosnian + Arguments.of("bs-Latn", "hr-HR", MUTUALLY_INTELLIGIBLE), + Arguments.of("hr-BA", "bs-Cyrl", MUTUALLY_INTELLIGIBLE), + + // Serbian Cyrillic should be matched with Serbian Latin + Arguments.of("sr-Latn", "sr", SAME), + Arguments.of("sr", "sr-Latn", SAME), + Arguments.of("sr-Latn-MK", "sr-Cyrl-ME", SAME), + + // Portuguese + Arguments.of("pt-PT", "pt", SAME), + Arguments.of("pt-BR", "pt-PT", SAME), + Arguments.of("pt-SE", "pt-JP", SAME), + Arguments.of("pt-US", "pt-CL", SAME), + + // Norwegian, Norwegian Bokmål, Nynorst + Arguments.of("nb", "da", HIGH), + Arguments.of("nn", "nb", SAME), + Arguments.of("no", "nb", SAME), + + // Uzbek should be matched as SAME for all scripts + Arguments.of("uz-Arab", "uz-Cyrl", SAME), + Arguments.of("uz-Cyrl", "uz", SAME), + Arguments.of("uz", "uz-Arab", SAME), + + // Traditional Chinese shouldn't be matched with Simplified + Arguments.of("zh-Hant", "zh-CN", NONE), + Arguments.of("zh-Hant", "zh", NONE), + Arguments.of("zh-MK", "zh-CN", SAME), + Arguments.of("zh-FR", "zh-CN", SAME), + Arguments.of("zh-TW", "zh-US", SAME)); + } + + @Test + void whenCalculatingAffinityForSwedishAgainstBokmaalNorwegianAndDanish_returnsNone() { + final LocaleAffinityCalculator matcher = + LocaleAffinityCalculatorBaseImpl.builder() + .againstLocales( + Set.of("da", "nb", "no").stream() + .map(ULocale::forLanguageTag) + .collect(Collectors.toSet())) + .build(); + + assertThat(matcher.calculate("sv"), is(LocaleAffinityResult.builder().affinity(NONE).build())); + } +} diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java index b931f2f..043d8d2 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java @@ -28,6 +28,7 @@ import static org.hamcrest.Matchers.is; import static org.junit.jupiter.api.Assertions.*; +import com.ibm.icu.impl.locale.LSR; import com.ibm.icu.util.ULocale; import com.spotify.i18n.locales.common.LocaleAffinityCalculator; import com.spotify.i18n.locales.common.model.LocaleAffinity; @@ -60,7 +61,7 @@ void whenBuildingWithMissingRequiredProperties_buildFails() { assertThrows( IllegalStateException.class, () -> LocaleAffinityCalculatorBaseImpl.builder().build()); - assertEquals(thrown.getMessage(), "Missing required properties: againstLocales"); + assertEquals(thrown.getMessage(), "Property \"againstLocales\" has not been set"); } @Test @@ -78,6 +79,63 @@ void whenBuildingWithRootAsPartOfAgainstLocales_buildFails() { "The locales against which affinity needs to be calculated cannot contain the root."); } + @Test + void whenBuildingWithUnavailableLocale_buildSucceeds() { + LocaleAffinityCalculatorBaseImpl built = + (LocaleAffinityCalculatorBaseImpl) + LocaleAffinityCalculatorBaseImpl.builder() + .againstLocales( + Set.of(ULocale.forLanguageTag("apples"), ULocale.forLanguageTag("English"))) + .build(); + + assertTrue(built.againstLocales().isEmpty()); + assertTrue(built.againstSpokenLocales().isEmpty()); + assertTrue(built.againstMaximizedLSRs().isEmpty()); + } + + @Test + void whenBuildingForHappyPath_buildSucceedsAndStructuresArePrepared() { + final Set againstLocales = + Set.of( + ULocale.FRENCH, + ULocale.CANADA_FRENCH, + ULocale.ENGLISH, + ULocale.CHINESE, + ULocale.TRADITIONAL_CHINESE, + ULocale.GERMANY, + ULocale.forLanguageTag("de-AT"), + ULocale.forLanguageTag("de-CH"), + ULocale.JAPAN); + + LocaleAffinityCalculatorBaseImpl built = + (LocaleAffinityCalculatorBaseImpl) + LocaleAffinityCalculatorBaseImpl.builder().againstLocales(againstLocales).build(); + + assertEquals(againstLocales, built.againstLocales()); + assertEquals( + Set.of( + ULocale.forLanguageTag("de"), + ULocale.forLanguageTag("fr"), + ULocale.forLanguageTag("en"), + ULocale.forLanguageTag("ja"), + ULocale.forLanguageTag("zh-Hans"), + ULocale.forLanguageTag("zh-Hant")), + built.againstSpokenLocales()); + + assertEquals( + Set.of( + "de-Latn-AT", + "de-Latn-CH", + "de-Latn-DE", + "en-Latn-US", + "fr-Latn-CA", + "fr-Latn-FR", + "ja-Jpan-JP", + "zh-Hans-CN", + "zh-Hant-TW"), + built.againstMaximizedLSRs().stream().map(LSR::toString).collect(Collectors.toSet())); + } + @ParameterizedTest @MethodSource(value = "whenCalculating_returnsExpectedAffinity") void whenCalculatingAgainstEmptySetOfLocales_alwaysReturnsAffinityNone(final String languageTag) {