diff --git a/README.md b/README.md index 1f4e085..05f8673 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ localization quality assurance testers only. You can see all these concepts in action in [our HTTP server example implementation](./examples/locales-http-examples). -#### Calculate the affinity between two locales +#### Calculate the affinity between locales This feature enables you to easily and programmatically reason around affinity between locales, without having to know anything about how they relate to each other. @@ -77,18 +77,31 @@ We define the affinity between two locales using a `LocaleAffinity` enum value: should understand both if they understand one of them. - `SAME`: Locales identify the same language -We offer two separate logics, each dedicated to separate use-cases: +We offer separate affinity logics, each dedicated to separate use-cases: -- **Locale affinity calculation**: To be used when we need visibility on the affinity of a given - locale against a set of locales. -- **Reference locales calculation:** To be used when we need to join two datasets based on language - identifiers. It is indeed impossible to perform such a join operation out of the box, as language - identifiers can immensely differ even when they are syntactically valid and identify the very same - language. For Example: `zh-Hant`, `zh-HK`, `zh-MO`, `zh-Hant-TW`, `zh-Hant-FR`, `zh-US` all - identify Traditional Chinese, but `zh` and `zh-CN` identify Simplified Chinese. +##### Calculate the affinity of a given locale against a set of locales -You can see all these concepts in action -in [our locales affinity example implementations](./examples/locales-affinity-examples). +This should be used when we need visibility on the affinity of a given locale, against a set of +pre-configured locales. This can, for instance, be used to verify whether some content language is a +good match for a given user, based on the Accept-Language header value received in an incoming +request. + +You can see this concept in action +in [our example implementation](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityCalculationExampleMain.java). + +##### Calculate the affinity between 2 given locales + +This should be used when we need visibility on the affinity between two given locales. This can, for +instance, be used to join two datasets based on language identifiers and how they related to each +other in terms of affinity. + +It is indeed impossible to perform such a join operation out of the box, as language identifiers +can immensely differ even when they are syntactically valid and identify the very same language. For +example: `zh-Hant`, `zh-HK`, `zh-MO`, `zh-Hant-TW`, `zh-Hant-FR`, `zh-US` all +identify Traditional Chinese, but `zh` and `zh-CN` identify Simplified Chinese. + +You can see this concept in action +in [our example implementation](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java). ### Utility helpers diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java new file mode 100644 index 0000000..2f6933d --- /dev/null +++ b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java @@ -0,0 +1,122 @@ +/*- + * -\-\- + * locales-affinity-examples + * -- + * Copyright (C) 2016 - 2025 Spotify AB + * -- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * -/-/- + */ + +package com.spotify.i18n.locales.affinity.examples; + +import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator; +import com.spotify.i18n.locales.common.LocaleAffinityHelpersFactory; +import com.spotify.i18n.locales.common.model.LocaleAffinityResult; +import java.util.List; + +/** + * Showcase implementation of Java-locales affinity calculation + * + * @author Eric Fjøsne + */ +public class AffinityBasedJoinExampleMain { + + /** Create a {@link LocaleAffinityBiCalculator} instance out of the factory */ + private static final LocaleAffinityBiCalculator LOCALE_AFFINITY_BI_CALCULATOR = + LocaleAffinityHelpersFactory.getDefaultInstance().buildAffinityBiCalculator(); + + /** + * Example logic which attempts to join 2 sets of language tags. + * + *

Possible joins in the execution output are: + * + *

+ * + * @param args + */ + public static void main(String[] args) { + final List languageTagsInOriginDataset = + List.of( + "bs-Cyrl-BA", // Bosnian (Cyrillic), Bosnia and Herzegovina + "de", // German + "da-SE", // Danish (Sweden) + "en-GB", // English (Great-Britain) + "es-BE", // Spanish (Belgium) + "fr-SE", // French (Sweden) + "hr-BA", // Croatian (Bosnia and Herzegovina) + "it-CH", // Italian (Switzerland) + "ja-IT", // Japanese (Italy) + "nl-BE", // Dutch (Belgium) + "zh-Hans-US", // Chinese (Simplified) (USA) + "zh-HK" // Chinese (Hong-Kong) + ); + + final List languageTagsInTargetDataset = + List.of( + "bs-Latn", // Bosnian (Latin) + "ca", // Catalan + "de-AT", // German (Austria) + "en-JP", // English (Japan) + "en-SE", // English (Sweden) + "fr-BE-u-ca-gregorian", // French (Belgium), with gregorian calendar extension + "fr-CA", // French (Canada) + "hr-MK", // Croatian (North Macedonia) + "ja@calendar=buddhist", // Japanese, with buddhist calendar extension + "nb-FI", // Norwegian Bokmål (Finland) + "nl-ZA", // Dutch (South Africa) + "pt-US", // Portuguese (USA) + "zh-CN" // Chinese (Mainland China) + ); + + // Iterate through all possible combinations, and calculate the affinity for each of them. + for (String languageTagInOriginDataset : languageTagsInOriginDataset) { + for (String languageTagInTargetDataset : languageTagsInTargetDataset) { + LocaleAffinityResult affinityResult = + LOCALE_AFFINITY_BI_CALCULATOR.calculate( + languageTagInOriginDataset, languageTagInTargetDataset); + switch (affinityResult.affinity()) { + case NONE: + System.out.println( + String.format( + "(%s, %s) -> No join possible.", + languageTagInOriginDataset, languageTagInTargetDataset)); + break; + default: + System.out.println( + String.format( + "(%s, %s) -> Join possible with %s affinity.", + languageTagInOriginDataset, + languageTagInTargetDataset, + affinityResult.affinity())); + break; + } + } + } + } +} diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java deleted file mode 100644 index 61c1697..0000000 --- a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java +++ /dev/null @@ -1,134 +0,0 @@ -/*- - * -\-\- - * locales-affinity-examples - * -- - * Copyright (C) 2016 - 2025 Spotify AB - * -- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * -/-/- - */ - -package com.spotify.i18n.locales.affinity.examples; - -import com.ibm.icu.util.ULocale; -import com.spotify.i18n.locales.common.LocaleAffinityHelpersFactory; -import com.spotify.i18n.locales.common.ReferenceLocalesCalculator; -import com.spotify.i18n.locales.common.model.RelatedReferenceLocale; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -/** - * Showcase implementation of Java-locales affinity calculation - * - * @author Eric Fjøsne - */ -public class ReferenceLocalesBasedJoinExampleMain { - - /** Create a {@link ReferenceLocalesCalculator} instance out of the factory */ - private static final ReferenceLocalesCalculator REFERENCE_LOCALES_CALCULATOR = - LocaleAffinityHelpersFactory.getDefaultInstance().buildRelatedReferenceLocalesCalculator(); - - /** - * Example logic which attempts to join 2 sets of language tags. - * - *

Possible joins in the execution output are: - * - *

- * - * @param args - */ - public static void main(String[] args) { - final List languageTagsInOriginDataset = - List.of( - "de", // German - "en-GB", // English (Great-Britain) - "es-BE", // Spanish (Belgium) - "fr-SE", // French (Sweden) - "it-CH", // Italian (Switzerland) - "ja-IT", // Japanese (Italy) - "nl-BE", // Dutch (Belgium) - "zh-Hans-US", // Chinese (Simplified) (USA) - "zh-HK" // Chinese (Hong-Kong) - ); - final List languageTagsInTargetDataset = - List.of( - "ca", // Catalan - "de-AT", // German (Austria) - "en-JP", // English (Japan) - "en-SE", // English (Sweden) - "fr-BE-u-ca-gregorian", // French (Belgium), with gregorian calendar extension - "fr-CA", // French (Canada) - "ja@calendar=buddhist", // Japanese, with buddhist calendar extension - "nl-ZA", // Dutch (South Africa) - "pt-US", // Portuguese (USA) - "zh-CN" // Chinese (Mainland China) - ); - - // Iterate through all possible combinations - for (String languageTagInOriginDataset : languageTagsInOriginDataset) { - for (String languageTagInTargetDataset : languageTagsInTargetDataset) { - // Retrieve the optional related reference locale based on which a join operation can be - // performed, and display the outcome in the execution output. - getRelatedReferenceLocaleForJoin(languageTagInOriginDataset, languageTagInTargetDataset) - .ifPresentOrElse( - (rrl) -> - System.out.println( - String.format( - "(%s, %s) on reference locale [%s] with %s affinity", - languageTagInOriginDataset, - languageTagInTargetDataset, - rrl.referenceLocale().toLanguageTag(), - rrl.affinity())), - () -> - System.out.println( - String.format( - "(%s, %s), no join possible.", - languageTagInOriginDataset, languageTagInTargetDataset))); - } - } - } - - /** - * Returns the optional {@link RelatedReferenceLocale} based on which the 2 language tags can be - * joined together. - */ - public static Optional getRelatedReferenceLocaleForJoin( - final String languageTagInOriginDataset, final String languageTagInTargetDataset) { - Optional bestMatchingReferenceLocale = - REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale( - languageTagInTargetDataset); - if (bestMatchingReferenceLocale.isEmpty()) { - return Optional.empty(); - } else { - List relatedReferenceLocales = - REFERENCE_LOCALES_CALCULATOR - .calculateRelatedReferenceLocales(languageTagInOriginDataset) - .stream() - .collect(Collectors.toList()); - return relatedReferenceLocales.stream() - .filter(rrl -> rrl.referenceLocale().equals(bestMatchingReferenceLocale.get())) - .findFirst(); - } - } -} diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java new file mode 100644 index 0000000..440ecb0 --- /dev/null +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java @@ -0,0 +1,42 @@ +/*- + * -\-\- + * locales-common + * -- + * Copyright (C) 2016 - 2025 Spotify AB + * -- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * -/-/- + */ + +package com.spotify.i18n.locales.common; + +import com.spotify.i18n.locales.common.model.LocaleAffinityResult; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Represents an engine that calculates the locale affinity between two given language tags. All + * implementations of this interface must return a non-null {@link LocaleAffinityResult}, even when + * the given language tags are null or empty. + * + * @author Eric Fjøsne + */ +public interface LocaleAffinityBiCalculator { + + /** + * Returns the calculated {@link LocaleAffinityResult} for the two given language tags + * + * @return the locale affinity result + */ + LocaleAffinityResult calculate( + @Nullable final String languageTag1, @Nullable final String languageTag2); +} diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java index 018274e..829a551 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java @@ -24,6 +24,7 @@ import com.ibm.icu.util.ULocale; import com.spotify.i18n.locales.common.impl.LocaleAffinityCalculatorBaseImpl; import com.spotify.i18n.locales.common.impl.ReferenceLocalesCalculatorBaseImpl; +import com.spotify.i18n.locales.common.model.LocaleAffinity; import com.spotify.i18n.locales.utils.acceptlanguage.AcceptLanguageUtils; import com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils; import edu.umd.cs.findbugs.annotations.Nullable; @@ -36,13 +37,15 @@ * A factory for creating instances of locale affinity related helpers: * *
    - *
  • {@link LocaleAffinityCalculator}: A helper that calculates a locale affinity for a language - * tag, against a given set of locales. - *
  • {@link ReferenceLocalesCalculator}: A helper that enables reference locales based - * operations, most notably to join datasets by enabling match operations between an origin - * and a target locale, and enabling filtering on the affinity between these locales. + *
  • {@link LocaleAffinityCalculator}: A helper that calculates the locale affinity for a given + * language tag, against a configured set of locales. + *
  • {@link LocaleAffinityBiCalculator}: A helper that calculates the locale affinity between + * two given language tags. + *
  • {@link ReferenceLocalesCalculator}: A helper that enables reference locale-based + * operations. *
* + * @see LocaleAffinity * @author Eric Fjøsne */ public class LocaleAffinityHelpersFactory { @@ -64,6 +67,7 @@ private LocaleAffinityHelpersFactory() {} * * @param acceptLanguage The Accept-Language value * @return Pre-configured locale affinity calculator + * @see LocaleAffinity * @see LocaleAffinityCalculator * @see Accept-Language @@ -82,6 +86,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForAcceptLanguage( * calculate affinity for a language tag, against all the given supplied locales. * * @return Pre-configured locale affinity calculator + * @see LocaleAffinity * @see LocaleAffinityCalculator * @see ULocale */ @@ -97,6 +102,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLocales(final SetInvalid or improperly formatted language tags will be ignored. * * @return Pre-configured locale affinity calculator + * @see LocaleAffinity * @see LocaleAffinityCalculator * @see IETF BCP 47 language tag */ @@ -110,6 +116,18 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags( .collect(Collectors.toSet())); } + /** + * Returns a pre-configured, ready-to-use instance of {@link LocaleAffinityBiCalculator}, that can + * calculate the affinity between two given language tags. + * + * @return Pre-configured locale affinity bi-calculator + * @see LocaleAffinity + * @see LocaleAffinityBiCalculator + */ + public LocaleAffinityBiCalculator buildAffinityBiCalculator() { + return ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator(); + } + /** * Returns a pre-configured, ready-to-use instance of {@link ReferenceLocalesCalculator}. * @@ -117,6 +135,6 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags( * @see ReferenceLocalesCalculator */ public ReferenceLocalesCalculator buildRelatedReferenceLocalesCalculator() { - return ReferenceLocalesCalculatorBaseImpl.builder().build(); + return ReferenceLocalesCalculatorBaseImpl.builder().buildReferenceLocalesCalculator(); } } diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java index 1f432a2..00b50dd 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java @@ -79,6 +79,10 @@ public abstract class LocaleAffinityCalculatorBaseImpl implements LocaleAffinity private static final int SCORE_THRESHOLD_HIGH = 30; private static final int SCORE_THRESHOLD_LOW = 0; + // Language codes for which we need some manual tweaks + private static final String LANGUAGE_CODE_CROATIAN = "hr"; + private static final String LANGUAGE_CODE_BOSNIAN = "bs"; + /** * Returns the set of {@link ULocale} against which affinity is being calculated. * @@ -167,6 +171,13 @@ private LocaleAffinity convertScoreToLocaleAffinity(final int score) { } private int getDistanceBetweenInputAndSupported(final LSR maxParsed, final LSR maxSupported) { + // Croatian should be matched with Bosnian. This is the case for Bosnian written in Latin + // script, but not Cyrillic, because the ICU implementation enforces script matching. We + // created a workaround to ensure that we return a MUTUALLY_INTELLIGIBLE affinity when + // encountering this locale. + if (calculatingDistanceBetweenCroatianAndBosnian(maxParsed, maxSupported)) { + return 0; + } return LOCALE_DISTANCE_INSTANCE.getBestIndexAndDistance( maxParsed, new LSR[] {maxSupported}, @@ -176,6 +187,13 @@ private int getDistanceBetweenInputAndSupported(final LSR maxParsed, final LSR m LOCALE_DISTANCE_DIRECTION); } + private boolean calculatingDistanceBetweenCroatianAndBosnian(final LSR lsr1, final LSR lsr2) { + return (lsr1.language.equals(LANGUAGE_CODE_CROATIAN) + && lsr2.language.equals(LANGUAGE_CODE_BOSNIAN)) + || (lsr1.language.equals(LANGUAGE_CODE_BOSNIAN) + && lsr2.language.equals(LANGUAGE_CODE_CROATIAN)); + } + private static LSR getMaximizedLanguageScriptRegion(final ULocale locale) { return LIKELY_SUBTAGS_INSTANCE.makeMaximizedLsrFrom( locale, LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH); diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java index 4f8347e..241986c 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java @@ -20,12 +20,16 @@ package com.spotify.i18n.locales.common.impl; +import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale; + import com.google.auto.value.AutoValue; import com.ibm.icu.util.LocaleMatcher; import com.ibm.icu.util.ULocale; +import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator; import com.spotify.i18n.locales.common.LocaleAffinityCalculator; import com.spotify.i18n.locales.common.ReferenceLocalesCalculator; import com.spotify.i18n.locales.common.model.LocaleAffinity; +import com.spotify.i18n.locales.common.model.LocaleAffinityResult; import com.spotify.i18n.locales.common.model.RelatedReferenceLocale; import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils; import com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils; @@ -48,7 +52,8 @@ * @author Eric Fjøsne */ @AutoValue -public abstract class ReferenceLocalesCalculatorBaseImpl implements ReferenceLocalesCalculator { +public abstract class ReferenceLocalesCalculatorBaseImpl + implements ReferenceLocalesCalculator, LocaleAffinityBiCalculator { /** Prepared {@link LocaleMatcher}, ready to find the best matching reference locale */ private static final LocaleMatcher REFERENCE_LOCALE_MATCHER = @@ -105,6 +110,23 @@ public Optional calculateBestMatchingReferenceLocale( return LanguageTagUtils.parse(languageTag).map(REFERENCE_LOCALE_MATCHER::getBestMatch); } + @Override + public LocaleAffinityResult calculate( + @Nullable final String languageTag1, @Nullable final String languageTag2) { + return LocaleAffinityResult.builder() + .affinity( + calculateBestMatchingReferenceLocale(languageTag2) + .map( + referenceLocale -> + calculateRelatedReferenceLocales(languageTag1).stream() + .filter(rrl -> isSameLocale(rrl.referenceLocale(), referenceLocale)) + .findFirst() + .map(RelatedReferenceLocale::affinity) + .orElse(LocaleAffinity.NONE)) + .orElse(LocaleAffinity.NONE)) + .build(); + } + /** * Returns a {@link Builder} instance that will allow you to manually create a {@link * ReferenceLocalesCalculatorBaseImpl} instance. @@ -123,7 +145,12 @@ public abstract static class Builder { abstract ReferenceLocalesCalculatorBaseImpl autoBuild(); /** Builds a {@link ReferenceLocalesCalculator} out of this builder. */ - public final ReferenceLocalesCalculator build() { + public final ReferenceLocalesCalculator buildReferenceLocalesCalculator() { + return autoBuild(); + } + + /** Builds a {@link LocaleAffinityBiCalculator} out of this builder. */ + public final LocaleAffinityBiCalculator buildLocaleAffinityBiCalculator() { return autoBuild(); } } diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java index 91af1b1..503227e 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java @@ -157,6 +157,13 @@ void whenBuildingRelatedReferenceLocalesCalculator_returnsExpectedCalculator() { instanceof ReferenceLocalesCalculator); } + @Test + void whenBuildingAffinityBiCalculator_returnsExpectedCalculator() { + assertTrue( + LocaleAffinityHelpersFactory.getDefaultInstance().buildAffinityBiCalculator() + instanceof LocaleAffinityBiCalculator); + } + @ParameterizedTest @MethodSource void diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java index 42e0cd3..b931f2f 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java @@ -49,7 +49,7 @@ class LocaleAffinityCalculatorBaseImplTest { public static final LocaleAffinityCalculator CALCULATOR_AGAINST_TEST_SET_OF_LOCALES = LocaleAffinityCalculatorBaseImpl.builder() .againstLocales( - Set.of("ar", "bs", "es", "fr", "ja", "pt", "sr-Latn", "zh-Hant").stream() + Set.of("ar", "bs-Cyrl", "es", "fr", "ja", "pt", "sr-Latn", "zh-Hant").stream() .map(ULocale::forLanguageTag) .collect(Collectors.toSet())) .build(); @@ -107,6 +107,14 @@ public static Stream whenCalculating_returnsExpectedAffinity() { Arguments.of("ca-ES", LOW), Arguments.of("ca-AD", LOW), + // Bosnian should be matched for all scripts and regions, since we support Bosnian + Arguments.of("bs", SAME), + Arguments.of("bs-Latn", SAME), + Arguments.of("bs-Cyrl", SAME), + Arguments.of("bs-BA", SAME), + Arguments.of("bs-Latn-BA", SAME), + Arguments.of("bs-Cyrl-BA", SAME), + // No english should be matched Arguments.of("en", NONE), Arguments.of("en-GB", NONE), diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java index bd12a58..c939584 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java @@ -23,6 +23,7 @@ import static com.spotify.i18n.locales.common.model.LocaleAffinity.HIGH; import static com.spotify.i18n.locales.common.model.LocaleAffinity.LOW; import static com.spotify.i18n.locales.common.model.LocaleAffinity.MUTUALLY_INTELLIGIBLE; +import static com.spotify.i18n.locales.common.model.LocaleAffinity.NONE; import static com.spotify.i18n.locales.common.model.LocaleAffinity.SAME; import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale; import static org.hamcrest.MatcherAssert.assertThat; @@ -31,14 +32,17 @@ import com.ibm.icu.util.ULocale; import com.ibm.icu.util.ULocale.Builder; +import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator; import com.spotify.i18n.locales.common.ReferenceLocalesCalculator; import com.spotify.i18n.locales.common.model.LocaleAffinity; import com.spotify.i18n.locales.common.model.RelatedReferenceLocale; import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils; import com.spotify.i18n.locales.utils.language.LanguageUtils; +import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.stream.Stream; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -46,7 +50,10 @@ class ReferenceLocalesCalculatorBaseImplTest { public static final ReferenceLocalesCalculator REFERENCE_LOCALES_CALCULATOR = - ReferenceLocalesCalculatorBaseImpl.builder().build(); + ReferenceLocalesCalculatorBaseImpl.builder().buildReferenceLocalesCalculator(); + + public static final LocaleAffinityBiCalculator LOCALE_AFFINITY_BI_CALCULATOR = + ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator(); public static Stream validateLocaleAffinityScoreRanges() { return AvailableLocalesUtils.getCldrLocales().stream().map(Arguments::of); @@ -131,8 +138,6 @@ private boolean areKnownMutuallyIntelligibleLocales(ULocale inputLS, ULocale ref switch (input) { // Bosnian and Croatian case "bs-Latn": - return reference.equals("hr-Latn"); - // Bosnian and Croatian case "bs-Cyrl": return reference.equals("hr-Latn"); // Croatian and Bosnian @@ -501,4 +506,49 @@ private static List serbian() { private static List swedish() { return List.of(rrl("sv", SAME), rrl("sv-AX", SAME), rrl("sv-FI", SAME)); } + + @Test + public void whenCalculatingForOutlierValues_returnsExpected() { + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(null, null).affinity()); + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("", "").affinity()); + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(null, "").affinity()); + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("", null).affinity()); + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(" ", " ").affinity()); + } + + @Test + public void whenCalculatingBestMatchingReferenceLocaleForOutlierValues_returnsExpected() { + assertEquals( + Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale(null)); + assertEquals( + Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale("")); + assertEquals( + Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale(" ")); + } + + @Test + public void whenCalculatingRelatedReferenceLocalesForOutlierValues_returnsExpected() { + assertEquals( + Collections.emptyList(), + REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales(null)); + assertEquals( + Collections.emptyList(), REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales("")); + assertEquals( + Collections.emptyList(), + REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales(" ")); + } + + @Test + public void calculateBiAffinity() { + assertEquals( + MUTUALLY_INTELLIGIBLE, + LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Latn", "hr-BA").affinity()); + assertEquals( + MUTUALLY_INTELLIGIBLE, + LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Cyrl", "hr-BA").affinity()); + assertEquals( + MUTUALLY_INTELLIGIBLE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs", "hr-BA").affinity()); + assertEquals( + MUTUALLY_INTELLIGIBLE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Latn", "hr").affinity()); + } } diff --git a/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java b/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java index e8a72f6..9edb7bd 100644 --- a/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java +++ b/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java @@ -63,8 +63,9 @@ public static Optional getWrittenLanguageLocale(final String languageTa } private static ULocale getWrittenLanguageLocaleForLocale(final ULocale locale) { - // Croatian is Bosnia is matched with Bosnian (Latin script). This is likely a bug in icu4j. We - // created a workaround to ensure that we return Croatian when encountering this locale. + // The written language locale matcher matches Croatian in Bosnia with Bosnian (Latin script). + // This is likely a bug in icu4j. We created a workaround to ensure that we return Croatian when + // encountering this locale. if (isCroatianBosnia(locale)) { return CROATIAN; } else {