From b9964eb3fa65c46117d2a465f890651daf61fa87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 10:55:20 +0200 Subject: [PATCH 01/10] feat: Add locale affinity bi calculator --- .../AffinityBasedJoinExampleMain.java | 122 ++++++++++++++++ .../ReferenceLocalesBasedJoinExampleMain.java | 134 ------------------ .../common/LocaleAffinityBiCalculator.java | 42 ++++++ .../common/LocaleAffinityHelpersFactory.java | 6 +- .../LocaleAffinityCalculatorBaseImpl.java | 18 +++ .../ReferenceLocalesCalculatorBaseImpl.java | 31 +++- .../LocaleAffinityHelpersFactoryTest.java | 7 + .../LocaleAffinityCalculatorBaseImplTest.java | 10 +- ...eferenceLocalesCalculatorBaseImplTest.java | 43 ++++-- .../locales/utils/language/LanguageUtils.java | 5 +- 10 files changed, 266 insertions(+), 152 deletions(-) create mode 100644 examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java delete mode 100644 examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java create mode 100644 locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java new file mode 100644 index 0000000..1e2ad36 --- /dev/null +++ b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java @@ -0,0 +1,122 @@ +/*- + * -\-\- + * locales-affinity-examples + * -- + * Copyright (C) 2016 - 2025 Spotify AB + * -- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * -/-/- + */ + +package com.spotify.i18n.locales.affinity.examples; + +import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator; +import com.spotify.i18n.locales.common.LocaleAffinityHelpersFactory; +import com.spotify.i18n.locales.common.model.LocaleAffinityResult; +import java.util.List; + +/** + * Showcase implementation of Java-locales affinity calculation + * + * @author Eric Fjøsne + */ +public class AffinityBasedJoinExampleMain { + + /** Create a {@link LocaleAffinityBiCalculator} instance out of the factory */ + private static final LocaleAffinityBiCalculator LOCALE_AFFINITY_BI_CALCULATOR = + LocaleAffinityHelpersFactory.getDefaultInstance().buildAffinityBiCalculator(); + + /** + * Example logic which attempts to join 2 sets of language tags. + * + *

Possible joins in the execution output are: + * + *

+ * + * @param args + */ + public static void main(String[] args) { + final List languageTagsInOriginDataset = + List.of( + "bs-Cyrl-BA", // Bosnian (Cyrillic), Bosnia and Herzegovina + "de", // German + "da-SE", // Danish (Sweden) + "en-GB", // English (Great-Britain) + "es-BE", // Spanish (Belgium) + "fr-SE", // French (Sweden) + "hr-BA", // Croatian (Bosnia and Herzegovina) + "it-CH", // Italian (Switzerland) + "ja-IT", // Japanese (Italy) + "nl-BE", // Dutch (Belgium) + "zh-Hans-US", // Chinese (Simplified) (USA) + "zh-HK" // Chinese (Hong-Kong) + ); + final List languageTagsInTargetDataset = + List.of( + "bs-Latn", // Bosnian (Latin) + "ca", // Catalan + "de-AT", // German (Austria) + "en-JP", // English (Japan) + "en-SE", // English (Sweden) + "fr-BE-u-ca-gregorian", // French (Belgium), with gregorian calendar extension + "fr-CA", // French (Canada) + "ja@calendar=buddhist", // Japanese, with buddhist calendar extension + "nb-FI", // Norwegian Bokmål (Finland) + "nl-ZA", // Dutch (South Africa) + "pt-US", // Portuguese (USA) + "zh-CN" // Chinese (Mainland China) + ); + + // Iterate through all possible combinations + for (String languageTagInOriginDataset : languageTagsInOriginDataset) { + for (String languageTagInTargetDataset : languageTagsInTargetDataset) { + // Retrieve the optional related reference locale based on which a join operation can be + // performed, and display the outcome in the execution output. + LocaleAffinityResult affinityResult = + LOCALE_AFFINITY_BI_CALCULATOR.calculate( + languageTagInOriginDataset, languageTagInTargetDataset); + switch (affinityResult.affinity()) { + case NONE: + if (false) { + System.out.println( + String.format( + "(%s, %s) -> No join possible.", + languageTagInOriginDataset, languageTagInTargetDataset)); + } + break; + default: + System.out.println( + String.format( + "(%s, %s) -> Join possible with %s affinity.", + languageTagInOriginDataset, + languageTagInTargetDataset, + affinityResult.affinity())); + break; + } + } + } + } +} diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java deleted file mode 100644 index 61c1697..0000000 --- a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/ReferenceLocalesBasedJoinExampleMain.java +++ /dev/null @@ -1,134 +0,0 @@ -/*- - * -\-\- - * locales-affinity-examples - * -- - * Copyright (C) 2016 - 2025 Spotify AB - * -- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * -/-/- - */ - -package com.spotify.i18n.locales.affinity.examples; - -import com.ibm.icu.util.ULocale; -import com.spotify.i18n.locales.common.LocaleAffinityHelpersFactory; -import com.spotify.i18n.locales.common.ReferenceLocalesCalculator; -import com.spotify.i18n.locales.common.model.RelatedReferenceLocale; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -/** - * Showcase implementation of Java-locales affinity calculation - * - * @author Eric Fjøsne - */ -public class ReferenceLocalesBasedJoinExampleMain { - - /** Create a {@link ReferenceLocalesCalculator} instance out of the factory */ - private static final ReferenceLocalesCalculator REFERENCE_LOCALES_CALCULATOR = - LocaleAffinityHelpersFactory.getDefaultInstance().buildRelatedReferenceLocalesCalculator(); - - /** - * Example logic which attempts to join 2 sets of language tags. - * - *

Possible joins in the execution output are: - * - *

    - *
  • (de, de-AT) on reference locale [de-AT] with SAME affinity - *
  • (en-GB, en-JP) on reference locale [en-GB] with SAME affinity - *
  • (en-GB, en-SE) on reference locale [en-SE] with SAME affinity - *
  • (es-BE, ca) on reference locale [ca] with LOW affinity - *
  • (fr-SE, fr-BE-u-ca-gregorian) on reference locale [fr-BE] with SAME affinity - *
  • (fr-SE, fr-CA) on reference locale [fr-CA] with SAME affinity - *
  • (ja-IT, ja@calendar=buddhist) on reference locale [ja] with SAME affinity - *
  • (nl-BE, nl-ZA) on reference locale [nl] with SAME affinity - *
  • (zh-Hans-US, zh-CN) on reference locale [zh] with SAME affinity - *
- * - * @param args - */ - public static void main(String[] args) { - final List languageTagsInOriginDataset = - List.of( - "de", // German - "en-GB", // English (Great-Britain) - "es-BE", // Spanish (Belgium) - "fr-SE", // French (Sweden) - "it-CH", // Italian (Switzerland) - "ja-IT", // Japanese (Italy) - "nl-BE", // Dutch (Belgium) - "zh-Hans-US", // Chinese (Simplified) (USA) - "zh-HK" // Chinese (Hong-Kong) - ); - final List languageTagsInTargetDataset = - List.of( - "ca", // Catalan - "de-AT", // German (Austria) - "en-JP", // English (Japan) - "en-SE", // English (Sweden) - "fr-BE-u-ca-gregorian", // French (Belgium), with gregorian calendar extension - "fr-CA", // French (Canada) - "ja@calendar=buddhist", // Japanese, with buddhist calendar extension - "nl-ZA", // Dutch (South Africa) - "pt-US", // Portuguese (USA) - "zh-CN" // Chinese (Mainland China) - ); - - // Iterate through all possible combinations - for (String languageTagInOriginDataset : languageTagsInOriginDataset) { - for (String languageTagInTargetDataset : languageTagsInTargetDataset) { - // Retrieve the optional related reference locale based on which a join operation can be - // performed, and display the outcome in the execution output. - getRelatedReferenceLocaleForJoin(languageTagInOriginDataset, languageTagInTargetDataset) - .ifPresentOrElse( - (rrl) -> - System.out.println( - String.format( - "(%s, %s) on reference locale [%s] with %s affinity", - languageTagInOriginDataset, - languageTagInTargetDataset, - rrl.referenceLocale().toLanguageTag(), - rrl.affinity())), - () -> - System.out.println( - String.format( - "(%s, %s), no join possible.", - languageTagInOriginDataset, languageTagInTargetDataset))); - } - } - } - - /** - * Returns the optional {@link RelatedReferenceLocale} based on which the 2 language tags can be - * joined together. - */ - public static Optional getRelatedReferenceLocaleForJoin( - final String languageTagInOriginDataset, final String languageTagInTargetDataset) { - Optional bestMatchingReferenceLocale = - REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale( - languageTagInTargetDataset); - if (bestMatchingReferenceLocale.isEmpty()) { - return Optional.empty(); - } else { - List relatedReferenceLocales = - REFERENCE_LOCALES_CALCULATOR - .calculateRelatedReferenceLocales(languageTagInOriginDataset) - .stream() - .collect(Collectors.toList()); - return relatedReferenceLocales.stream() - .filter(rrl -> rrl.referenceLocale().equals(bestMatchingReferenceLocale.get())) - .findFirst(); - } - } -} diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java new file mode 100644 index 0000000..c9f2d06 --- /dev/null +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java @@ -0,0 +1,42 @@ +/*- + * -\-\- + * locales-common + * -- + * Copyright (C) 2016 - 2025 Spotify AB + * -- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * -/-/- + */ + +package com.spotify.i18n.locales.common; + +import com.spotify.i18n.locales.common.model.LocaleAffinityResult; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Represents an engine that calculates a locale affinity for two given language tags. All + * implementations of this interface must return a non-null {@link LocaleAffinityResult}, even when + * the given language tags are null or empty. + * + * @author Eric Fjøsne + */ +public interface LocaleAffinityBiCalculator { + + /** + * Returns the calculated {@link LocaleAffinityResult} for the given language tags + * + * @return the locale affinity result + */ + LocaleAffinityResult calculate( + @Nullable final String languageTag1, @Nullable final String languageTag2); +} diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java index 018274e..b791c26 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java @@ -110,6 +110,10 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags( .collect(Collectors.toSet())); } + public LocaleAffinityBiCalculator buildAffinityBiCalculator() { + return ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator(); + } + /** * Returns a pre-configured, ready-to-use instance of {@link ReferenceLocalesCalculator}. * @@ -117,6 +121,6 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags( * @see ReferenceLocalesCalculator */ public ReferenceLocalesCalculator buildRelatedReferenceLocalesCalculator() { - return ReferenceLocalesCalculatorBaseImpl.builder().build(); + return ReferenceLocalesCalculatorBaseImpl.builder().buildReferenceLocalesCalculator(); } } diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java index 1f432a2..00b50dd 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImpl.java @@ -79,6 +79,10 @@ public abstract class LocaleAffinityCalculatorBaseImpl implements LocaleAffinity private static final int SCORE_THRESHOLD_HIGH = 30; private static final int SCORE_THRESHOLD_LOW = 0; + // Language codes for which we need some manual tweaks + private static final String LANGUAGE_CODE_CROATIAN = "hr"; + private static final String LANGUAGE_CODE_BOSNIAN = "bs"; + /** * Returns the set of {@link ULocale} against which affinity is being calculated. * @@ -167,6 +171,13 @@ private LocaleAffinity convertScoreToLocaleAffinity(final int score) { } private int getDistanceBetweenInputAndSupported(final LSR maxParsed, final LSR maxSupported) { + // Croatian should be matched with Bosnian. This is the case for Bosnian written in Latin + // script, but not Cyrillic, because the ICU implementation enforces script matching. We + // created a workaround to ensure that we return a MUTUALLY_INTELLIGIBLE affinity when + // encountering this locale. + if (calculatingDistanceBetweenCroatianAndBosnian(maxParsed, maxSupported)) { + return 0; + } return LOCALE_DISTANCE_INSTANCE.getBestIndexAndDistance( maxParsed, new LSR[] {maxSupported}, @@ -176,6 +187,13 @@ private int getDistanceBetweenInputAndSupported(final LSR maxParsed, final LSR m LOCALE_DISTANCE_DIRECTION); } + private boolean calculatingDistanceBetweenCroatianAndBosnian(final LSR lsr1, final LSR lsr2) { + return (lsr1.language.equals(LANGUAGE_CODE_CROATIAN) + && lsr2.language.equals(LANGUAGE_CODE_BOSNIAN)) + || (lsr1.language.equals(LANGUAGE_CODE_BOSNIAN) + && lsr2.language.equals(LANGUAGE_CODE_CROATIAN)); + } + private static LSR getMaximizedLanguageScriptRegion(final ULocale locale) { return LIKELY_SUBTAGS_INSTANCE.makeMaximizedLsrFrom( locale, LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH); diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java index 4f8347e..241986c 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImpl.java @@ -20,12 +20,16 @@ package com.spotify.i18n.locales.common.impl; +import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale; + import com.google.auto.value.AutoValue; import com.ibm.icu.util.LocaleMatcher; import com.ibm.icu.util.ULocale; +import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator; import com.spotify.i18n.locales.common.LocaleAffinityCalculator; import com.spotify.i18n.locales.common.ReferenceLocalesCalculator; import com.spotify.i18n.locales.common.model.LocaleAffinity; +import com.spotify.i18n.locales.common.model.LocaleAffinityResult; import com.spotify.i18n.locales.common.model.RelatedReferenceLocale; import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils; import com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils; @@ -48,7 +52,8 @@ * @author Eric Fjøsne */ @AutoValue -public abstract class ReferenceLocalesCalculatorBaseImpl implements ReferenceLocalesCalculator { +public abstract class ReferenceLocalesCalculatorBaseImpl + implements ReferenceLocalesCalculator, LocaleAffinityBiCalculator { /** Prepared {@link LocaleMatcher}, ready to find the best matching reference locale */ private static final LocaleMatcher REFERENCE_LOCALE_MATCHER = @@ -105,6 +110,23 @@ public Optional calculateBestMatchingReferenceLocale( return LanguageTagUtils.parse(languageTag).map(REFERENCE_LOCALE_MATCHER::getBestMatch); } + @Override + public LocaleAffinityResult calculate( + @Nullable final String languageTag1, @Nullable final String languageTag2) { + return LocaleAffinityResult.builder() + .affinity( + calculateBestMatchingReferenceLocale(languageTag2) + .map( + referenceLocale -> + calculateRelatedReferenceLocales(languageTag1).stream() + .filter(rrl -> isSameLocale(rrl.referenceLocale(), referenceLocale)) + .findFirst() + .map(RelatedReferenceLocale::affinity) + .orElse(LocaleAffinity.NONE)) + .orElse(LocaleAffinity.NONE)) + .build(); + } + /** * Returns a {@link Builder} instance that will allow you to manually create a {@link * ReferenceLocalesCalculatorBaseImpl} instance. @@ -123,7 +145,12 @@ public abstract static class Builder { abstract ReferenceLocalesCalculatorBaseImpl autoBuild(); /** Builds a {@link ReferenceLocalesCalculator} out of this builder. */ - public final ReferenceLocalesCalculator build() { + public final ReferenceLocalesCalculator buildReferenceLocalesCalculator() { + return autoBuild(); + } + + /** Builds a {@link LocaleAffinityBiCalculator} out of this builder. */ + public final LocaleAffinityBiCalculator buildLocaleAffinityBiCalculator() { return autoBuild(); } } diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java index 91af1b1..503227e 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactoryTest.java @@ -157,6 +157,13 @@ void whenBuildingRelatedReferenceLocalesCalculator_returnsExpectedCalculator() { instanceof ReferenceLocalesCalculator); } + @Test + void whenBuildingAffinityBiCalculator_returnsExpectedCalculator() { + assertTrue( + LocaleAffinityHelpersFactory.getDefaultInstance().buildAffinityBiCalculator() + instanceof LocaleAffinityBiCalculator); + } + @ParameterizedTest @MethodSource void diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java index 42e0cd3..b931f2f 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/LocaleAffinityCalculatorBaseImplTest.java @@ -49,7 +49,7 @@ class LocaleAffinityCalculatorBaseImplTest { public static final LocaleAffinityCalculator CALCULATOR_AGAINST_TEST_SET_OF_LOCALES = LocaleAffinityCalculatorBaseImpl.builder() .againstLocales( - Set.of("ar", "bs", "es", "fr", "ja", "pt", "sr-Latn", "zh-Hant").stream() + Set.of("ar", "bs-Cyrl", "es", "fr", "ja", "pt", "sr-Latn", "zh-Hant").stream() .map(ULocale::forLanguageTag) .collect(Collectors.toSet())) .build(); @@ -107,6 +107,14 @@ public static Stream whenCalculating_returnsExpectedAffinity() { Arguments.of("ca-ES", LOW), Arguments.of("ca-AD", LOW), + // Bosnian should be matched for all scripts and regions, since we support Bosnian + Arguments.of("bs", SAME), + Arguments.of("bs-Latn", SAME), + Arguments.of("bs-Cyrl", SAME), + Arguments.of("bs-BA", SAME), + Arguments.of("bs-Latn-BA", SAME), + Arguments.of("bs-Cyrl-BA", SAME), + // No english should be matched Arguments.of("en", NONE), Arguments.of("en-GB", NONE), diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java index bd12a58..fbe57c5 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java @@ -31,6 +31,7 @@ import com.ibm.icu.util.ULocale; import com.ibm.icu.util.ULocale.Builder; +import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator; import com.spotify.i18n.locales.common.ReferenceLocalesCalculator; import com.spotify.i18n.locales.common.model.LocaleAffinity; import com.spotify.i18n.locales.common.model.RelatedReferenceLocale; @@ -39,6 +40,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Stream; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -46,7 +48,10 @@ class ReferenceLocalesCalculatorBaseImplTest { public static final ReferenceLocalesCalculator REFERENCE_LOCALES_CALCULATOR = - ReferenceLocalesCalculatorBaseImpl.builder().build(); + ReferenceLocalesCalculatorBaseImpl.builder().buildReferenceLocalesCalculator(); + + public static final LocaleAffinityBiCalculator LOCALE_AFFINITY_BI_CALCULATOR = + ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator(); public static Stream validateLocaleAffinityScoreRanges() { return AvailableLocalesUtils.getCldrLocales().stream().map(Arguments::of); @@ -129,34 +134,32 @@ private boolean areKnownMutuallyIntelligibleLocales(ULocale inputLS, ULocale ref String reference = referenceLS.toLanguageTag(); switch (input) { - // Bosnian and Croatian + // Bosnian and Croatian case "bs-Latn": - return reference.equals("hr-Latn"); - // Bosnian and Croatian case "bs-Cyrl": return reference.equals("hr-Latn"); - // Croatian and Bosnian + // Croatian and Bosnian case "hr-Latn": return reference.equals("bs-Latn") || reference.equals("bs-Cyrl"); - // German and Luxembourgish or Swiss German + // German and Luxembourgish or Swiss German case "de-Latn": return reference.equals("lb-Latn") || reference.equals("gsw-Latn"); - // Luxembourgish and German + // Luxembourgish and German case "lb-Latn": return reference.equals("de-Latn"); - // Swiss German and German + // Swiss German and German case "gsw-Latn": return reference.equals("de-Latn"); - // Bokmål and Norwegian + // Bokmål and Norwegian case "nb-Latn": return reference.equals("no-Latn"); - // Norwegian and Bokmål + // Norwegian and Bokmål case "no-Latn": return reference.equals("nb-Latn"); - // Serbian (Latin script) and Serbian (Cyrillic script) + // Serbian (Latin script) and Serbian (Cyrillic script) case "sr-Latn": return reference.equals("sr-Cyrl"); - // Serbian (Cyrillic script) and Serbian (Latin script) + // Serbian (Cyrillic script) and Serbian (Latin script) case "sr-Cyrl": return reference.equals("sr-Latn"); default: @@ -501,4 +504,20 @@ private static List serbian() { private static List swedish() { return List.of(rrl("sv", SAME), rrl("sv-AX", SAME), rrl("sv-FI", SAME)); } + + @Test + public void calculateBiAffinity() { + assertEquals( + MUTUALLY_INTELLIGIBLE, + LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Latn", "hr-BA").affinity()); + assertEquals( + MUTUALLY_INTELLIGIBLE, + LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Cyrl", "hr-BA").affinity()); + assertEquals( + MUTUALLY_INTELLIGIBLE, + LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs", "hr-BA").affinity()); + assertEquals( + MUTUALLY_INTELLIGIBLE, + LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Latn", "hr").affinity()); + } } diff --git a/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java b/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java index e8a72f6..9edb7bd 100644 --- a/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java +++ b/locales-utils/src/main/java/com/spotify/i18n/locales/utils/language/LanguageUtils.java @@ -63,8 +63,9 @@ public static Optional getWrittenLanguageLocale(final String languageTa } private static ULocale getWrittenLanguageLocaleForLocale(final ULocale locale) { - // Croatian is Bosnia is matched with Bosnian (Latin script). This is likely a bug in icu4j. We - // created a workaround to ensure that we return Croatian when encountering this locale. + // The written language locale matcher matches Croatian in Bosnia with Bosnian (Latin script). + // This is likely a bug in icu4j. We created a workaround to ensure that we return Croatian when + // encountering this locale. if (isCroatianBosnia(locale)) { return CROATIAN; } else { From 5f35d20861728eb4111708ff0be896b8a511ec5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:13:02 +0200 Subject: [PATCH 02/10] Docs: Refine javadoc & examples --- .../AffinityBasedJoinExampleMain.java | 4 ++++ .../common/LocaleAffinityBiCalculator.java | 2 +- .../common/LocaleAffinityHelpersFactory.java | 24 +++++++++++++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java index 1e2ad36..ab35d41 100644 --- a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java +++ b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java @@ -43,6 +43,7 @@ public class AffinityBasedJoinExampleMain { * *
    *
  • (bs-Cyrl-BA, bs-Latn) -> Join possible with SAME affinity. + *
  • (bs-Cyrl-BA, hr-MK) -> Join possible with MUTUALLY_INTELLIGIBLE affinity. *
  • (de, de-AT) -> Join possible with SAME affinity. *
  • (da-SE, nb-FI) -> Join possible with HIGH affinity. *
  • (en-GB, en-JP) -> Join possible with SAME affinity. @@ -51,6 +52,7 @@ public class AffinityBasedJoinExampleMain { *
  • (fr-SE, fr-BE-u-ca-gregorian) -> Join possible with SAME affinity. *
  • (fr-SE, fr-CA) -> Join possible with SAME affinity. *
  • (hr-BA, bs-Latn) -> Join possible with MUTUALLY_INTELLIGIBLE affinity. + *
  • (hr-BA, hr-MK) -> Join possible with SAME affinity. *
  • (ja-IT, ja@calendar=buddhist) -> Join possible with SAME affinity. *
  • (nl-BE, nl-ZA) -> Join possible with SAME affinity. *
  • (zh-Hans-US, zh-CN) -> Join possible with SAME affinity. @@ -74,6 +76,7 @@ public static void main(String[] args) { "zh-Hans-US", // Chinese (Simplified) (USA) "zh-HK" // Chinese (Hong-Kong) ); + final List languageTagsInTargetDataset = List.of( "bs-Latn", // Bosnian (Latin) @@ -83,6 +86,7 @@ public static void main(String[] args) { "en-SE", // English (Sweden) "fr-BE-u-ca-gregorian", // French (Belgium), with gregorian calendar extension "fr-CA", // French (Canada) + "hr-MK", // Croatian (North Macedonia) "ja@calendar=buddhist", // Japanese, with buddhist calendar extension "nb-FI", // Norwegian Bokmål (Finland) "nl-ZA", // Dutch (South Africa) diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java index c9f2d06..1f9c6b1 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java @@ -24,7 +24,7 @@ import edu.umd.cs.findbugs.annotations.Nullable; /** - * Represents an engine that calculates a locale affinity for two given language tags. All + * Represents an engine that calculates the locale affinity for two given language tags. All * implementations of this interface must return a non-null {@link LocaleAffinityResult}, even when * the given language tags are null or empty. * diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java index b791c26..093dd16 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java @@ -24,6 +24,7 @@ import com.ibm.icu.util.ULocale; import com.spotify.i18n.locales.common.impl.LocaleAffinityCalculatorBaseImpl; import com.spotify.i18n.locales.common.impl.ReferenceLocalesCalculatorBaseImpl; +import com.spotify.i18n.locales.common.model.LocaleAffinity; import com.spotify.i18n.locales.utils.acceptlanguage.AcceptLanguageUtils; import com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils; import edu.umd.cs.findbugs.annotations.Nullable; @@ -36,13 +37,15 @@ * A factory for creating instances of locale affinity related helpers: * *
      - *
    • {@link LocaleAffinityCalculator}: A helper that calculates a locale affinity for a language - * tag, against a given set of locales. - *
    • {@link ReferenceLocalesCalculator}: A helper that enables reference locales based - * operations, most notably to join datasets by enabling match operations between an origin - * and a target locale, and enabling filtering on the affinity between these locales. + *
    • {@link LocaleAffinityCalculator}: A helper that calculates the locale affinity for a given + * language tag, against a configured set of locales. + *
    • {@link LocaleAffinityBiCalculator}: A helper that calculates the locale affinity for two + * given language tags. + *
    • {@link ReferenceLocalesCalculator}: A helper that enables reference locale-based + * operations. *
    * + * @see LocaleAffinity * @author Eric Fjøsne */ public class LocaleAffinityHelpersFactory { @@ -64,6 +67,7 @@ private LocaleAffinityHelpersFactory() {} * * @param acceptLanguage The Accept-Language value * @return Pre-configured locale affinity calculator + * @see LocaleAffinity * @see LocaleAffinityCalculator * @see Accept-Language @@ -82,6 +86,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForAcceptLanguage( * calculate affinity for a language tag, against all the given supplied locales. * * @return Pre-configured locale affinity calculator + * @see LocaleAffinity * @see LocaleAffinityCalculator * @see ULocale */ @@ -97,6 +102,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLocales(final SetInvalid or improperly formatted language tags will be ignored. * * @return Pre-configured locale affinity calculator + * @see LocaleAffinity * @see LocaleAffinityCalculator * @see IETF BCP 47 language tag */ @@ -110,6 +116,14 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags( .collect(Collectors.toSet())); } + /** + * Returns a pre-configured, ready-to-use instance of {@link LocaleAffinityBiCalculator}, that can + * calculate the affinity between two given language tags. + * + * @return Pre-configured locale affinity bi-calculator + * @see LocaleAffinity + * @see LocaleAffinityBiCalculator + */ public LocaleAffinityBiCalculator buildAffinityBiCalculator() { return ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator(); } From 4e18de4878d298d6b7b35612f6a43d7201b0184a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:32:15 +0200 Subject: [PATCH 03/10] docs: Refresh README.md --- README.md | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 1f4e085..b3809de 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ localization quality assurance testers only. You can see all these concepts in action in [our HTTP server example implementation](./examples/locales-http-examples). -#### Calculate the affinity between two locales +##### Calculate the affinity between locales This feature enables you to easily and programmatically reason around affinity between locales, without having to know anything about how they relate to each other. @@ -77,18 +77,31 @@ We define the affinity between two locales using a `LocaleAffinity` enum value: should understand both if they understand one of them. - `SAME`: Locales identify the same language -We offer two separate logics, each dedicated to separate use-cases: +We offer separate affinity logics, each dedicated to separate use-cases: -- **Locale affinity calculation**: To be used when we need visibility on the affinity of a given - locale against a set of locales. -- **Reference locales calculation:** To be used when we need to join two datasets based on language - identifiers. It is indeed impossible to perform such a join operation out of the box, as language - identifiers can immensely differ even when they are syntactically valid and identify the very same - language. For Example: `zh-Hant`, `zh-HK`, `zh-MO`, `zh-Hant-TW`, `zh-Hant-FR`, `zh-US` all - identify Traditional Chinese, but `zh` and `zh-CN` identify Simplified Chinese. +##### Calculate the affinity of a given locale against a set of locales -You can see all these concepts in action -in [our locales affinity example implementations](./examples/locales-affinity-examples). +This should be used when we need visibility on the affinity of a given locale, against a set of +pre-configured locales. This can, for instance, be used to verify whether some content language is a +good match for a given user, based on the Accept-Language header value received in an incoming +request. + +You can see all this concept in action +in [our example implementations](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityCalculationExampleMain.java). + +#### Calculate the affinity between 2 given locales + +This should be used when we need visibility on the affinity between two given locales. This can, for +instance, be used to join two datasets based on language identifiers and how they related to each +other in terms of affinity. + +It is indeed impossible to perform such a join operation out of the box, as language identifiers +can immensely differ even when they are syntactically valid and identify the very same language. For +example: `zh-Hant`, `zh-HK`, `zh-MO`, `zh-Hant-TW`, `zh-Hant-FR`, `zh-US` all +identify Traditional Chinese, but `zh` and `zh-CN` identify Simplified Chinese. + +You can see all this concept in action +in [our example implementations](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java). ### Utility helpers From 17dc2f63f0e72d042959940319d2a5b490240d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:32:15 +0200 Subject: [PATCH 04/10] docs: Refresh README.md --- README.md | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 1f4e085..62f736e 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ localization quality assurance testers only. You can see all these concepts in action in [our HTTP server example implementation](./examples/locales-http-examples). -#### Calculate the affinity between two locales +##### Calculate the affinity between locales This feature enables you to easily and programmatically reason around affinity between locales, without having to know anything about how they relate to each other. @@ -77,18 +77,31 @@ We define the affinity between two locales using a `LocaleAffinity` enum value: should understand both if they understand one of them. - `SAME`: Locales identify the same language -We offer two separate logics, each dedicated to separate use-cases: +We offer separate affinity logics, each dedicated to separate use-cases: -- **Locale affinity calculation**: To be used when we need visibility on the affinity of a given - locale against a set of locales. -- **Reference locales calculation:** To be used when we need to join two datasets based on language - identifiers. It is indeed impossible to perform such a join operation out of the box, as language - identifiers can immensely differ even when they are syntactically valid and identify the very same - language. For Example: `zh-Hant`, `zh-HK`, `zh-MO`, `zh-Hant-TW`, `zh-Hant-FR`, `zh-US` all - identify Traditional Chinese, but `zh` and `zh-CN` identify Simplified Chinese. +##### Calculate the affinity of a given locale against a set of locales -You can see all these concepts in action -in [our locales affinity example implementations](./examples/locales-affinity-examples). +This should be used when we need visibility on the affinity of a given locale, against a set of +pre-configured locales. This can, for instance, be used to verify whether some content language is a +good match for a given user, based on the Accept-Language header value received in an incoming +request. + +You can see this concept in action +in [our example implementation](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityCalculationExampleMain.java). + +#### Calculate the affinity between 2 given locales + +This should be used when we need visibility on the affinity between two given locales. This can, for +instance, be used to join two datasets based on language identifiers and how they related to each +other in terms of affinity. + +It is indeed impossible to perform such a join operation out of the box, as language identifiers +can immensely differ even when they are syntactically valid and identify the very same language. For +example: `zh-Hant`, `zh-HK`, `zh-MO`, `zh-Hant-TW`, `zh-Hant-FR`, `zh-US` all +identify Traditional Chinese, but `zh` and `zh-CN` identify Simplified Chinese. + +You can see this concept in action +in [our example implementation](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java). ### Utility helpers From 1bc6e8f3de8e84ca81e6786dec3d6527aebe262b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:35:49 +0200 Subject: [PATCH 05/10] chore: Address comments --- .../affinity/examples/AffinityBasedJoinExampleMain.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java index ab35d41..1af63ab 100644 --- a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java +++ b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java @@ -94,11 +94,9 @@ public static void main(String[] args) { "zh-CN" // Chinese (Mainland China) ); - // Iterate through all possible combinations + // Iterate through all possible combinations, and calculate the affinity for each of them. for (String languageTagInOriginDataset : languageTagsInOriginDataset) { for (String languageTagInTargetDataset : languageTagsInTargetDataset) { - // Retrieve the optional related reference locale based on which a join operation can be - // performed, and display the outcome in the execution output. LocaleAffinityResult affinityResult = LOCALE_AFFINITY_BI_CALCULATOR.calculate( languageTagInOriginDataset, languageTagInTargetDataset); From f5e24f3c6b79ccdcf5d46f94ce655d0362fb0ba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:37:02 +0200 Subject: [PATCH 06/10] Update AffinityBasedJoinExampleMain.java --- .../examples/AffinityBasedJoinExampleMain.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java index 1af63ab..2f6933d 100644 --- a/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java +++ b/examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityBasedJoinExampleMain.java @@ -102,12 +102,10 @@ public static void main(String[] args) { languageTagInOriginDataset, languageTagInTargetDataset); switch (affinityResult.affinity()) { case NONE: - if (false) { - System.out.println( - String.format( - "(%s, %s) -> No join possible.", - languageTagInOriginDataset, languageTagInTargetDataset)); - } + System.out.println( + String.format( + "(%s, %s) -> No join possible.", + languageTagInOriginDataset, languageTagInTargetDataset)); break; default: System.out.println( From fb470b6a66b865b5f5ca85134dbbe96b8d50d88f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:39:35 +0200 Subject: [PATCH 07/10] docs: Fix formulation in JavaDoc --- .../i18n/locales/common/LocaleAffinityBiCalculator.java | 4 ++-- .../i18n/locales/common/LocaleAffinityHelpersFactory.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java index 1f9c6b1..440ecb0 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityBiCalculator.java @@ -24,7 +24,7 @@ import edu.umd.cs.findbugs.annotations.Nullable; /** - * Represents an engine that calculates the locale affinity for two given language tags. All + * Represents an engine that calculates the locale affinity between two given language tags. All * implementations of this interface must return a non-null {@link LocaleAffinityResult}, even when * the given language tags are null or empty. * @@ -33,7 +33,7 @@ public interface LocaleAffinityBiCalculator { /** - * Returns the calculated {@link LocaleAffinityResult} for the given language tags + * Returns the calculated {@link LocaleAffinityResult} for the two given language tags * * @return the locale affinity result */ diff --git a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java index 093dd16..829a551 100644 --- a/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java +++ b/locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java @@ -39,8 +39,8 @@ *
      *
    • {@link LocaleAffinityCalculator}: A helper that calculates the locale affinity for a given * language tag, against a configured set of locales. - *
    • {@link LocaleAffinityBiCalculator}: A helper that calculates the locale affinity for two - * given language tags. + *
    • {@link LocaleAffinityBiCalculator}: A helper that calculates the locale affinity between + * two given language tags. *
    • {@link ReferenceLocalesCalculator}: A helper that enables reference locale-based * operations. *
    From 8b5e34dba7a42a1e018973b7272aed12242765bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:44:56 +0200 Subject: [PATCH 08/10] test: Add tests for outlier values --- ...eferenceLocalesCalculatorBaseImplTest.java | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java index fbe57c5..134123e 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java @@ -23,6 +23,7 @@ import static com.spotify.i18n.locales.common.model.LocaleAffinity.HIGH; import static com.spotify.i18n.locales.common.model.LocaleAffinity.LOW; import static com.spotify.i18n.locales.common.model.LocaleAffinity.MUTUALLY_INTELLIGIBLE; +import static com.spotify.i18n.locales.common.model.LocaleAffinity.NONE; import static com.spotify.i18n.locales.common.model.LocaleAffinity.SAME; import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale; import static org.hamcrest.MatcherAssert.assertThat; @@ -37,6 +38,7 @@ import com.spotify.i18n.locales.common.model.RelatedReferenceLocale; import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils; import com.spotify.i18n.locales.utils.language.LanguageUtils; +import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.stream.Stream; @@ -505,6 +507,37 @@ private static List swedish() { return List.of(rrl("sv", SAME), rrl("sv-AX", SAME), rrl("sv-FI", SAME)); } + @Test + public void whenCalculatingForOutlierValues_returnsExpected() { + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(null, null).affinity()); + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("", "").affinity()); + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(null, "").affinity()); + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("", null).affinity()); + assertEquals(NONE, LOCALE_AFFINITY_BI_CALCULATOR.calculate(" ", " ").affinity()); + } + + @Test + public void whenCalculatingBestMatchingReferenceLocaleForOutlierValues_returnsExpected() { + assertEquals( + Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale(null)); + assertEquals( + Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale("")); + assertEquals( + Optional.empty(), REFERENCE_LOCALES_CALCULATOR.calculateBestMatchingReferenceLocale(" ")); + } + + @Test + public void whenCalculatingRelatedReferenceLocalesForOutlierValues_returnsExpected() { + assertEquals( + Collections.emptyList(), + REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales(null)); + assertEquals( + Collections.emptyList(), REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales("")); + assertEquals( + Collections.emptyList(), + REFERENCE_LOCALES_CALCULATOR.calculateRelatedReferenceLocales(" ")); + } + @Test public void calculateBiAffinity() { assertEquals( @@ -514,10 +547,8 @@ public void calculateBiAffinity() { MUTUALLY_INTELLIGIBLE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Cyrl", "hr-BA").affinity()); assertEquals( - MUTUALLY_INTELLIGIBLE, - LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs", "hr-BA").affinity()); + MUTUALLY_INTELLIGIBLE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs", "hr-BA").affinity()); assertEquals( - MUTUALLY_INTELLIGIBLE, - LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Latn", "hr").affinity()); + MUTUALLY_INTELLIGIBLE, LOCALE_AFFINITY_BI_CALCULATOR.calculate("bs-Latn", "hr").affinity()); } } From da6b1be3e9417f0a6170cdddc00779ffa4d3a9f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:52:44 +0200 Subject: [PATCH 09/10] chore: Formatting --- ...ReferenceLocalesCalculatorBaseImplTest.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java index 134123e..c939584 100644 --- a/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java +++ b/locales-common/src/test/java/com/spotify/i18n/locales/common/impl/ReferenceLocalesCalculatorBaseImplTest.java @@ -136,32 +136,32 @@ private boolean areKnownMutuallyIntelligibleLocales(ULocale inputLS, ULocale ref String reference = referenceLS.toLanguageTag(); switch (input) { - // Bosnian and Croatian + // Bosnian and Croatian case "bs-Latn": case "bs-Cyrl": return reference.equals("hr-Latn"); - // Croatian and Bosnian + // Croatian and Bosnian case "hr-Latn": return reference.equals("bs-Latn") || reference.equals("bs-Cyrl"); - // German and Luxembourgish or Swiss German + // German and Luxembourgish or Swiss German case "de-Latn": return reference.equals("lb-Latn") || reference.equals("gsw-Latn"); - // Luxembourgish and German + // Luxembourgish and German case "lb-Latn": return reference.equals("de-Latn"); - // Swiss German and German + // Swiss German and German case "gsw-Latn": return reference.equals("de-Latn"); - // Bokmål and Norwegian + // Bokmål and Norwegian case "nb-Latn": return reference.equals("no-Latn"); - // Norwegian and Bokmål + // Norwegian and Bokmål case "no-Latn": return reference.equals("nb-Latn"); - // Serbian (Latin script) and Serbian (Cyrillic script) + // Serbian (Latin script) and Serbian (Cyrillic script) case "sr-Latn": return reference.equals("sr-Cyrl"); - // Serbian (Cyrillic script) and Serbian (Latin script) + // Serbian (Cyrillic script) and Serbian (Latin script) case "sr-Cyrl": return reference.equals("sr-Latn"); default: From 9e137f94871a01416e9cc0fbd7ef84edbfe387e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Fj=C3=B8sne?= Date: Wed, 18 Jun 2025 11:53:58 +0200 Subject: [PATCH 10/10] docs: Adjust heading levels for affinity feature --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 62f736e..05f8673 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ localization quality assurance testers only. You can see all these concepts in action in [our HTTP server example implementation](./examples/locales-http-examples). -##### Calculate the affinity between locales +#### Calculate the affinity between locales This feature enables you to easily and programmatically reason around affinity between locales, without having to know anything about how they relate to each other. @@ -89,7 +89,7 @@ request. You can see this concept in action in [our example implementation](./examples/locales-affinity-examples/src/main/java/com/spotify/i18n/locales/affinity/examples/AffinityCalculationExampleMain.java). -#### Calculate the affinity between 2 given locales +##### Calculate the affinity between 2 given locales This should be used when we need visibility on the affinity between two given locales. This can, for instance, be used to join two datasets based on language identifiers and how they related to each