Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ codicefiscale.decode("CCCFBA85D03L219P")
# "province": "TO",
# "code": "L219",
# },
# "firstname_options": [
# "Fabio",
# ],
# "omocodes": [
# "CCCFBA85D03L219P",
# "CCCFBA85D03L21VE",
Expand All @@ -86,6 +89,9 @@ codicefiscale.decode("CCCFBA85D03L219P")
# }
```

> [!TIP]
> **Name suggestions**: The `firstname_options` field contains a list of possible first names matching the encoded firstname code. For Italian birthplaces, in approximately **60% of cases**, it returns a single name, providing near-certain identification. In other cases, it returns a list of possible names. For foreign birthplaces, the list is empty.

#### Check
```python
codicefiscale.is_valid("CCCFBA85D03L219P")
Expand Down
24 changes: 24 additions & 0 deletions src/codicefiscale/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
from codicefiscale.codicefiscale import (
decode,
decode_firstname,
decode_raw,
encode,
encode_birthdate,
encode_birthplace,
encode_cin,
encode_firstname,
encode_lastname,
is_omocode,
is_valid,
)
from codicefiscale.metadata import (
__author__,
__copyright__,
Expand All @@ -14,4 +27,15 @@
"__license__",
"__title__",
"__version__",
"decode",
"decode_firstname",
"decode_raw",
"encode",
"encode_birthdate",
"encode_birthplace",
"encode_cin",
"encode_firstname",
"encode_lastname",
"is_omocode",
"is_valid",
]
78 changes: 65 additions & 13 deletions src/codicefiscale/codicefiscale.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datetime import datetime, timedelta
from itertools import combinations
from re import Pattern
from typing import Any, Literal
from typing import Any, Literal, cast

from dateutil import parser as date_parser
from slugify import slugify
Expand Down Expand Up @@ -82,7 +82,15 @@
_OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))


_DATA: dict[str, dict[str, list[dict[str, Any]]]] = get_indexed_data()
_DATA: dict[str, Any] | None = None


def _get_data() -> dict[str, Any]:
global _DATA
if _DATA is None:
_DATA = get_indexed_data()
return _DATA


CODICEFISCALE_RE: Pattern[str] = re.compile(
r"^"
Expand Down Expand Up @@ -144,17 +152,18 @@ def _get_date(
def _get_birthplace(
birthplace: str,
birthdate: datetime | str | None = None,
) -> dict[str, dict[str, Any]] | None:
) -> dict[str, Any] | None:
birthplace_unicode_slug = slugify(birthplace, allow_unicode=True)
birthplace_slug = slugify(birthplace)
birthplace_code = birthplace_slug.upper()
birthplaces_options = _DATA["municipalities"].get(
data = _get_data()
birthplaces_options = data["municipalities"].get(
birthplace_unicode_slug,
_DATA["municipalities"].get(
data["municipalities"].get(
birthplace_slug,
_DATA["countries"].get(
data["countries"].get(
birthplace_slug,
_DATA["codes"].get(
data["codes"].get(
birthplace_code,
),
),
Expand All @@ -165,23 +174,23 @@ def _get_birthplace(

birthdate_date = _get_date(birthdate)
if not birthdate_date:
return birthplaces_options[0].copy()
return cast(dict[str, Any], birthplaces_options[0].copy())

# search birthplace that has been created before / deleted after birthdate
for birthplace_option in birthplaces_options:
date_created = _get_date(birthplace_option["date_created"]) or datetime.min
date_deleted = _get_date(birthplace_option["date_deleted"]) or datetime.max
# print(birthdate_date, date_created, date_deleted)
if birthdate_date >= date_created and birthdate_date <= date_deleted:
return birthplace_option.copy()
return cast(dict[str, Any], birthplace_option.copy())

return _get_birthplace_fallback(birthplaces_options, birthdate_date)


def _get_birthplace_fallback(
birthplaces_options: list[dict[str, Any]],
birthdate_date: datetime,
) -> dict[str, dict[str, Any]] | None:
) -> dict[str, Any] | None:
# avoid wrong birthplace code error when birthdate falls in
# missing date-range in the data-source even if birthplace code is valid
birthplaces_options_count = len(birthplaces_options)
Expand Down Expand Up @@ -280,6 +289,43 @@ def encode_firstname(firstname: str) -> str:
return firstname_code


def decode_firstname(
firstname_code: str, gender: Literal["m", "M", "f", "F"] | None = None
) -> list[str] | None:
"""
Decodes firstname code to possible italian first names.

Returns a list of possible names that encode to the given code.
Only works for common italian names.

:param firstname_code: The 3-character firstname code
:type firstname_code: string
:param gender: Optional gender filter ('M' or 'F')
:type gender: string | None

:returns: List of possible first names, or None if not found
:rtype: list[str] | None
"""
firstname_code_upper = firstname_code.upper()
data = _get_data()
names_by_gender = cast(
dict[str, list[str]] | None, data["names"].get(firstname_code_upper)
)

if not names_by_gender:
return None

if gender:
gender_upper = gender.upper()
if gender_upper in ("M", "F"):
gender_names = names_by_gender.get(gender_upper, [])
return gender_names if gender_names else None
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

decode_firstname returns the cached list from the global names index when a valid gender is provided. Because this is a mutable list shared across calls, a caller could accidentally mutate it (e.g., append/sort) and corrupt subsequent results, including decode() output. Return a copy (e.g., gender_names.copy()) to avoid leaking internal mutable state.

Suggested change
return gender_names if gender_names else None
return gender_names.copy() if gender_names else None

Copilot uses AI. Check for mistakes.
Comment on lines +318 to +322
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If gender is provided but not one of 'M'/'F', the function currently falls back to returning the combined list for both genders. That silently ignores invalid input and is inconsistent with other APIs here (e.g., encode_birthdate raises on invalid gender). Consider raising a ValueError for invalid gender (or returning None) so callers don’t get misleading results.

Suggested change
if gender:
gender_upper = gender.upper()
if gender_upper in ("M", "F"):
gender_names = names_by_gender.get(gender_upper, [])
return gender_names if gender_names else None
if gender is not None:
gender_upper = gender.upper()
if gender_upper not in ("M", "F"):
raise ValueError("[codicefiscale] 'gender' argument must be 'M' or 'F'")
gender_names = names_by_gender.get(gender_upper, [])
return gender_names if gender_names else None

Copilot uses AI. Check for mistakes.

# return all names (both genders) if no gender specified
all_names = names_by_gender.get("M", []) + names_by_gender.get("F", [])
return sorted(set(all_names)) if all_names else None


def encode_birthdate(
birthdate: datetime | str | None,
gender: Literal["m", "M", "f", "F"],
Expand Down Expand Up @@ -448,7 +494,7 @@ def decode_raw(code: str) -> dict[str, str]:
return data


def decode(code: str) -> dict[str, Any]:
def decode(code: str) -> dict[str, Any]: # noqa: C901
"""
Decodes the italian fiscal code.
Comment on lines +497 to 499
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding # noqa: C901 suppresses the configured McCabe complexity check for decode(). Since this is a core public API, consider refactoring into a few small private helpers (e.g., parsing birthdate/gender, resolving birthplace, computing firstname options) so linting can stay enabled and the function becomes easier to test/maintain.

Copilot uses AI. Check for mistakes.

Expand All @@ -466,11 +512,10 @@ def decode(code: str) -> dict[str, Any]:
birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))

gender: Literal["M", "F"] = "M"
if birthdate_day > 40:
birthdate_day -= 40
gender = "F"
else:
gender = "M"

current_year = datetime.now().year
current_year_century_prefix = str(current_year)[0:-2]
Expand Down Expand Up @@ -517,12 +562,19 @@ def decode(code: str) -> dict[str, Any]:
f"expected {cin_check!r}, found {cin!r}"
)

# add possible first names if birthplace is in Italy (not foreign country)
firstname_options = None
is_foreign = birthplace and birthplace.get("province") == "EE"
if not is_foreign:
firstname_options = decode_firstname(raw["firstname"], gender)

Comment thread
fabiocaccamo marked this conversation as resolved.
data = {
"code": code,
"omocodes": _get_omocodes(code),
"gender": gender,
"birthdate": birthdate,
"birthplace": birthplace,
"firstname_options": firstname_options or [],
"raw": raw,
}

Expand Down
35 changes: 26 additions & 9 deletions src/codicefiscale/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import os
import sys
from datetime import datetime
from typing import Any

import fsutil
Expand Down Expand Up @@ -33,23 +32,31 @@ def get_countries_data() -> Any:
return deleted_countries + countries


def get_indexed_data() -> dict[
str, dict[str, list[dict[str, bool | datetime | str | list[str]]]]
]:
def get_names_data() -> Any:
names = get_data("names.json")
return names


def get_indexed_data() -> dict[str, Any]:
from codicefiscale.codicefiscale import encode_firstname

municipalities = get_municipalities_data()
countries = get_countries_data()
data: dict[str, dict[str, list[dict[str, bool | datetime | str | list[str]]]]] = {
names = get_names_data()

data: dict[str, Any] = {
"municipalities": {},
"countries": {},
"codes": {},
"names": {},
}
Comment on lines +40 to 52
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_indexed_data() previously had a fairly precise return type, but it has been widened to dict[str, Any]. With mypy running in strict mode for this repo, this reduces type-safety for all consumers of the indexed data. Consider introducing a dedicated type (e.g., a TypedDict or type aliases for the municipalities/countries/codes/names indices) so the new names index can be added without falling back to Any everywhere.

Copilot uses AI. Check for mistakes.

for municipality in municipalities:
code = municipality["code"]
province = municipality["province"].lower()
municipality_unicode_slug = slugify(municipality["name"], allow_unicode=True)
names = [municipality_unicode_slug] + municipality["name_slugs"]
for name in names:
municipality_names = [municipality_unicode_slug] + municipality["name_slugs"]
for name in municipality_names:
name_and_province = f"{name}-{province}"
data["municipalities"].setdefault(name, [])
data["municipalities"].setdefault(name_and_province, [])
Expand All @@ -60,11 +67,21 @@ def get_indexed_data() -> dict[

for country in countries:
code = country["code"]
names = country["name_slugs"]
for name in names:
country_names = country["name_slugs"]
for name in country_names:
data["countries"].setdefault(name, [])
data["countries"][name].append(country)
data["codes"].setdefault(code, [])
data["codes"][code].append(country)

for gender, gender_names in names.items():
for name in gender_names:
code = encode_firstname(name)
data["names"].setdefault(code, {"M": set(), "F": set()})
data["names"][code][gender].add(name)

for code in data["names"]:
data["names"][code]["M"] = sorted(data["names"][code]["M"])
data["names"][code]["F"] = sorted(data["names"][code]["F"])

return data
Loading
Loading