Skip to content

validate

pinky_core.validate

Input validation and standardization helpers.

No top-level external imports — each function lazy-loads its dependency so the module is importable without any optional package installed.

Optional dependencies (install the ones you need)::

pip install pinky-core[validate]          # all validators
pip install email-validator                  # standardize_email
pip install phonenumbers                     # standardize_phone
pip install python-stdnum                    # validate_siret, validate_nir

Each function returns a typed result model. Call .model_dump() to convert to a plain dict when a Snowflake UDF handler needs to return a VARIANT/OBJECT.

EmailResult

Bases: BaseModel

Result of standardize_email.

Source code in src/pinky_core/validate.py
32
33
34
35
36
37
38
39
40
41
42
43
class EmailResult(BaseModel):
    """Result of ``standardize_email``."""

    is_valid_email: bool = False
    email: str | None = None
    ascii_email: str | None = None
    local_part: str | None = None
    domain: str | None = None
    ascii_local_part: str | None = None
    ascii_domain: str | None = None
    smtputf8: bool | None = None
    errors: list[str] | None = None

FrIdResult

Bases: BaseModel

Result of validate_siret — SIRET, SIREN, or TVA intra-community.

Source code in src/pinky_core/validate.py
59
60
61
62
63
64
65
66
67
68
class FrIdResult(BaseModel):
    """Result of ``validate_siret`` — SIRET, SIREN, or TVA intra-community."""

    type: str = "UNDEFINED"  # "SIRET" | "SIREN" | "TVA" | "UNDEFINED"
    is_valid: bool = False
    siret: str | None = None
    siren: str | None = None
    nic: str | None = None
    tva: str | None = None
    errors: list[str] | None = None

IbanBicResult

Bases: BaseModel

Result of validate_iban_bic.

Source code in src/pinky_core/validate.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
class IbanBicResult(BaseModel):
    """Result of ``validate_iban_bic``."""

    type: str = "UNDEFINED"  # "IBAN" | "BIC" | "UNDEFINED"
    is_valid: bool = False
    compact: str | None = None
    formatted: str | None = None
    country_code: str | None = None
    check_digits: str | None = None
    bank_code: str | None = None
    account_code: str | None = None
    location_code: str | None = None
    branch_code: str | None = None
    errors: list[str] | None = None

NirResult

Bases: BaseModel

Result of validate_nir.

Source code in src/pinky_core/validate.py
71
72
73
74
75
76
77
78
79
80
81
class NirResult(BaseModel):
    """Result of ``validate_nir``."""

    is_valid: bool = False
    nir: str | None = None
    gender: str | None = None
    birth_year: str | None = None
    birth_month: str | None = None
    department: str | None = None
    commune: str | None = None
    errors: list[str] | None = None

PhoneResult

Bases: BaseModel

Result of standardize_phone for a single number.

Source code in src/pinky_core/validate.py
46
47
48
49
50
51
52
53
54
55
56
class PhoneResult(BaseModel):
    """Result of ``standardize_phone`` for a single number."""

    is_possible_number: bool = False
    is_valid_number: bool = False
    national_format: str | None = None
    international_format: str | None = None
    e164_format: str | None = None
    rfc3966_format: str | None = None
    country_code: int | None = None
    errors: list[str] | None = None

VatResult

Bases: BaseModel

Result of validate_vat_eu.

Source code in src/pinky_core/validate.py
84
85
86
87
88
89
90
91
92
class VatResult(BaseModel):
    """Result of ``validate_vat_eu``."""

    is_valid: bool = False
    compact: str | None = None
    formatted: str | None = None
    country_code: str | None = None
    local_number: str | None = None
    errors: list[str] | None = None

standardize_email(email)

Validate and normalize an email address.

Uses email-validator to parse and normalize the address without checking deliverability (no DNS lookup).

Parameters:

Name Type Description Default
email str

Raw email string to validate.

required

Returns:

Type Description
EmailResult

EmailResult with is_valid_email=True and normalized fields on

EmailResult

success, or is_valid_email=False and errors on failure.

Source code in src/pinky_core/validate.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def standardize_email(email: str) -> EmailResult:
    """Validate and normalize an email address.

    Uses ``email-validator`` to parse and normalize the address without
    checking deliverability (no DNS lookup).

    Args:
        email: Raw email string to validate.

    Returns:
        ``EmailResult`` with ``is_valid_email=True`` and normalized fields on
        success, or ``is_valid_email=False`` and ``errors`` on failure.
    """
    from email_validator import validate_email

    try:
        v = validate_email(email.strip(), check_deliverability=False)
        return EmailResult(
            is_valid_email=True,
            ascii_email=v.ascii_email,
            email=v.email,
            local_part=v.local_part,
            domain=v.domain,
            ascii_local_part=v.ascii_local_part,
            ascii_domain=v.ascii_domain,
            smtputf8=v.smtputf8,
        )
    except Exception as e:
        return EmailResult(is_valid_email=False, errors=list(e.args))

standardize_phone(phone_number, country_code=None, default_country='FR')

Validate and normalize a phone number.

Detects the country from international prefixes when possible, then falls back to country_code, then to default_country.

Uses the phonenumbers library.

Parameters:

Name Type Description Default
phone_number str

Raw phone string (any format).

required
country_code str | None

ISO 3166-1 alpha-2 country from the source data. Pass None when the data has no country.

None
default_country str

Last-resort fallback when neither an international prefix nor country_code is available (default "FR").

'FR'

Returns:

Type Description
PhoneResult | list[PhoneResult]

A PhoneResult on success, a list[PhoneResult] when multiple

PhoneResult | list[PhoneResult]

numbers are found in the string, or a PhoneResult with

PhoneResult | list[PhoneResult]

is_valid_number=False and errors on failure.

Source code in src/pinky_core/validate.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def standardize_phone(
    phone_number: str,
    country_code: str | None = None,
    default_country: str = "FR",
) -> PhoneResult | list[PhoneResult]:
    """Validate and normalize a phone number.

    Detects the country from international prefixes when possible, then
    falls back to ``country_code``, then to ``default_country``.

    Uses the ``phonenumbers`` library.

    Args:
        phone_number:    Raw phone string (any format).
        country_code:    ISO 3166-1 alpha-2 country from the source data.
                         Pass ``None`` when the data has no country.
        default_country: Last-resort fallback when neither an international
                         prefix nor ``country_code`` is available (default ``"FR"``).

    Returns:
        A ``PhoneResult`` on success, a ``list[PhoneResult]`` when multiple
        numbers are found in the string, or a ``PhoneResult`` with
        ``is_valid_number=False`` and ``errors`` on failure.
    """
    import phonenumbers

    if not phone_number or not phone_number.strip():
        return PhoneResult(
            is_possible_number=False,
            is_valid_number=False,
            errors=["empty phone_number"],
        )

    # International prefix is authoritative — use it directly
    detected = _detect_phone_country(phone_number)
    if detected:
        try:
            return _format_phone(phonenumbers.parse(phone_number, detected))
        except phonenumbers.NumberParseException:
            pass

    # country_code from data may be wrong; try each candidate and keep the first
    # that produces a valid number (catches e.g. CH address with a FR local number)
    for candidate in filter(None, [country_code, default_country]):
        try:
            parsed = phonenumbers.parse(phone_number, candidate)
            if phonenumbers.is_valid_number(parsed):
                return _format_phone(parsed)
        except phonenumbers.NumberParseException:
            continue

    # Last resort: substring matcher on default_country
    matches = list(phonenumbers.PhoneNumberMatcher(phone_number, default_country))
    if matches:
        if len(matches) == 1:
            return _format_phone(matches[0].number)
        return [_format_phone(m.number) for m in matches]

    return PhoneResult(
        is_possible_number=False,
        is_valid_number=False,
        errors=["no valid phone number found"],
    )

validate_iban_bic(value)

Validate an IBAN or BIC/SWIFT code.

No external dependencies — uses pure-Python mod-97 checksum for IBAN.

Parameters:

Name Type Description Default
value str

Raw IBAN or BIC string (spaces and dashes allowed, will be stripped).

required

Returns:

Type Description
IbanBicResult

IbanBicResult with is_valid=True and decoded fields on success,

IbanBicResult

or is_valid=False and errors on failure.

Source code in src/pinky_core/validate.py
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
def validate_iban_bic(value: str) -> IbanBicResult:
    """Validate an IBAN or BIC/SWIFT code.

    No external dependencies — uses pure-Python mod-97 checksum for IBAN.

    Args:
        value: Raw IBAN or BIC string (spaces and dashes allowed, will be stripped).

    Returns:
        ``IbanBicResult`` with ``is_valid=True`` and decoded fields on success,
        or ``is_valid=False`` and ``errors`` on failure.
    """
    value = value.strip().replace(" ", "").replace("-", "").upper()

    if _BIC_PATTERN.match(value) and len(value) in (8, 11):
        return IbanBicResult(
            type="BIC",
            is_valid=True,
            compact=value,
            country_code=value[4:6],
            bank_code=value[:4],
            location_code=value[6:8],
            branch_code=value[8:] if len(value) == 11 else "XXX",
        )

    if len(value) >= 15 and value[:2].isalpha():
        country_code = value[:2]
        expected_len = _IBAN_LENGTHS.get(country_code)
        if not expected_len:
            return IbanBicResult(
                type="IBAN",
                is_valid=False,
                errors=[f"Unknown country code: {country_code}"],
            )
        if len(value) != expected_len:
            return IbanBicResult(
                type="IBAN",
                is_valid=False,
                errors=[
                    f"Expected {expected_len} chars for {country_code}, got {len(value)}"
                ],
            )
        if not _check_iban_mod97(value):
            return IbanBicResult(
                type="IBAN", is_valid=False, errors=["Invalid IBAN checksum (mod 97)"]
            )
        formatted = " ".join(value[i : i + 4] for i in range(0, len(value), 4))
        return IbanBicResult(
            type="IBAN",
            is_valid=True,
            compact=value,
            formatted=formatted,
            country_code=country_code,
            check_digits=value[2:4],
            bank_code=value[4:9] if country_code == "FR" else value[4:8],
            account_code=value[9:] if country_code == "FR" else value[8:],
        )

    return IbanBicResult(
        type="UNDEFINED", is_valid=False, errors=["Expected IBAN or BIC"]
    )

validate_nir(value)

Validate a French NIR (numéro de sécurité sociale).

Uses the python-stdnum library (stdnum.fr.nir).

Parameters:

Name Type Description Default
value str

Raw NIR string (spaces allowed, will be stripped).

required

Returns:

Type Description
NirResult

NirResult with is_valid=True and decoded fields on success,

NirResult

or is_valid=False and errors on failure.

Source code in src/pinky_core/validate.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
def validate_nir(value: str) -> NirResult:
    """Validate a French NIR (numéro de sécurité sociale).

    Uses the ``python-stdnum`` library (``stdnum.fr.nir``).

    Args:
        value: Raw NIR string (spaces allowed, will be stripped).

    Returns:
        ``NirResult`` with ``is_valid=True`` and decoded fields on success,
        or ``is_valid=False`` and ``errors`` on failure.
    """
    from stdnum.fr import nir as _nir

    value = value.strip().replace(" ", "")
    try:
        _nir.validate(value)
        return NirResult(
            is_valid=True,
            nir=_nir.format(value),
            gender="M" if value[0] == "1" else "F",
            birth_year=value[1:3],
            birth_month=value[3:5],
            department=value[5:7],
            commune=value[7:10],
        )
    except Exception as e:
        return NirResult(is_valid=False, errors=[str(e)])

validate_siret(value)

Validate a French SIRET, SIREN, or TVA (intra-community) number.

Dispatches by length/prefix: - 14 digits → SIRET - 9 digits → SIREN - FR + 11 chars → TVA intra-community

Uses the python-stdnum library (stdnum.fr).

Parameters:

Name Type Description Default
value str

Raw identifier string (spaces allowed, will be stripped).

required

Returns:

Type Description
FrIdResult

FrIdResult with is_valid=True and type-specific fields on success,

FrIdResult

or is_valid=False and errors on failure.

Source code in src/pinky_core/validate.py
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
def validate_siret(value: str) -> FrIdResult:
    """Validate a French SIRET, SIREN, or TVA (intra-community) number.

    Dispatches by length/prefix:
    - 14 digits → SIRET
    - 9 digits → SIREN
    - ``FR`` + 11 chars → TVA intra-community

    Uses the ``python-stdnum`` library (``stdnum.fr``).

    Args:
        value: Raw identifier string (spaces allowed, will be stripped).

    Returns:
        ``FrIdResult`` with ``is_valid=True`` and type-specific fields on success,
        or ``is_valid=False`` and ``errors`` on failure.
    """
    from stdnum.fr import siren as _siren
    from stdnum.fr import siret as _siret
    from stdnum.fr import tva as _tva

    value = value.strip().replace(" ", "")

    if len(value) == 14:
        return _do_validate_siret(value, _siret)
    if len(value) == 9:
        return _do_validate_siren(value, _siren)
    if value.startswith("FR") and len(value) == 13:
        return _do_validate_tva(value, _tva)
    return FrIdResult(
        type="UNDEFINED",
        is_valid=False,
        errors=["Expected SIRET (14 digits), SIREN (9 digits) or TVA (FR + 11 chars)"],
    )

validate_vat_eu(value)

Validate a European VAT number.

Supports all EU member states via the python-stdnum library (stdnum.eu.vat). The country prefix (first 2 chars) determines the country-specific validation rules.

Parameters:

Name Type Description Default
value str

Raw VAT string — spaces, dots and dashes are stripped before validation (e.g. "FR 12 345 678 901" or "DE123456789"). # pragma: allowlist secret

required

Returns:

Type Description
VatResult

VatResult with is_valid=True and normalised fields on success,

VatResult

or is_valid=False and errors on failure.

Example::

validate_vat_eu("FR12345678901")
# VatResult(is_valid=True, country_code="FR", local_number="12345678901", …)  # pragma: allowlist secret
Source code in src/pinky_core/validate.py
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
def validate_vat_eu(value: str) -> VatResult:
    """Validate a European VAT number.

    Supports all EU member states via the ``python-stdnum`` library
    (``stdnum.eu.vat``).  The country prefix (first 2 chars) determines
    the country-specific validation rules.

    Args:
        value: Raw VAT string — spaces, dots and dashes are stripped before
               validation (e.g. ``"FR 12 345 678 901"`` or ``"DE123456789"``).  # pragma: allowlist secret

    Returns:
        ``VatResult`` with ``is_valid=True`` and normalised fields on success,
        or ``is_valid=False`` and ``errors`` on failure.

    Example::

        validate_vat_eu("FR12345678901")
        # VatResult(is_valid=True, country_code="FR", local_number="12345678901", …)  # pragma: allowlist secret
    """
    from stdnum.eu import vat as _vat

    cleaned = re.sub(r"[\s.\-]", "", value.strip()).upper()

    if len(cleaned) < 4 or not cleaned[:2].isalpha():
        return VatResult(
            is_valid=False,
            errors=["Expected 2-letter country prefix followed by VAT number"],
        )

    try:
        _vat.validate(cleaned)
        compact = _vat.compact(cleaned)
        return VatResult(
            is_valid=True,
            compact=compact,
            country_code=compact[:2],
            local_number=compact[2:],
        )
    except Exception as e:
        return VatResult(is_valid=False, errors=[str(e)])