"""Utilitary functions to sort, format, or extract information."""
import warnings
from typing import Iterator, overload
from datetime import datetime, time
from enum import Enum
from .base import FakeLine, TrueLine
[docs]
def get_all_sub_lines(line: TrueLine) -> Iterator[TrueLine]:
"""/!\\ DEPRECATED /!\\ use the method :py:meth:`Line.get_all_sub_lines` instead.
Recursively iterate on :py:class:`.TrueLine` of higher level.
All lines under the given line are returned. The order is preserved
as in the gedcom file, sub-lines come before siblings lines."""
warnings.warn("Use the Line.get_all_sub_lines method instead of function in fastgedcom.helpers",
DeprecationWarning, stacklevel=2)
return line.get_all_sub_lines()
[docs]
def get_source(line: TrueLine | FakeLine) -> str:
"""/!\\ DEPRECATED /!\\ use the method :py:meth:`Line.get_source` instead.
Return the gedcom text equivalent for the line and its sub-lines."""
warnings.warn("Use the Line.get_source method instead of function in fastgedcom.helpers",
DeprecationWarning, stacklevel=2)
return line.get_source()
[docs]
class DateType(Enum):
"""Date modifiers allowed by the Gedcom specifications.
They can appear in payload of DATE lines."""
"""Date before Christ. Old version from Gedcom5."""
"""Date before common era. New version from Gedcom7."""
"""About date."""
"""Estimated date."""
"""Calculated date."""
"""Before date."""
"""After date."""
"""To date. Not prefixed by :py:attr:`FROM`."""
"""From date. Not followed by :py:attr:`TO`."""
[docs]
BET_AND = "BET {date1} AND {date2}"
"""Between date1 and date2."""
[docs]
FROM_TO = "FROM {date1} TO {date2}"
"""From date1 to date2."""
[docs]
def get_date_type(date: str) -> DateType | None:
"""Return the modifier used by DATE line payloads.
If no modifier is recognized, return None.
Ignore :py:attr:`BC` and :py:attr:`BCE` and return None,
because these modifiers can be combined with the others."""
if date[:4] == 'ABT ':
return DateType.ABT
if date[:4] == 'CAL ':
return DateType.CAL
if date[:4] == 'EST ':
return DateType.EST
if date[:4] == 'BEF ':
return DateType.BEF
if date[:4] == 'AFT ':
return DateType.AFT
if date[:4] == 'BET ' and 'AND' in date:
return DateType.BET_AND
if date[:5] == 'FROM ' and 'TO' in date:
return DateType.FROM_TO
if date[:5] == 'FROM ':
return DateType.FROM
if date[:3] == 'TO ':
return DateType.TO
return None
[docs]
def remove_trailing_zeros(date: str) -> str:
"""Removes useless 0 prefixing numbers."""
k = 0
while k+1 < len(date):
if date[k] != '0':
k += 1
elif k == 0 or date[k-1].isspace() or date[k-1] == '-':
date = date[:k] + date[k+1:]
else:
k += 1
return date
@overload
@overload
def extract_int_year(date: str, default: float) -> float: ...
def extract_int_year(date: str, default: float | None = None) -> float | None:
"""Format the payload of DATE lines.
Return the year of the date as an integer. On failure, return the default.
A :py:attr:`BCE` date returns a negative number. For :py:attr:`BET_AND` and
:py:attr:`FROM_TO` date types, this function returns the median number of
the range, hence the float type."""
year = extract_year(date)
if ' -- ' in year:
str_year1, str_year2 = year.split(' -- ', 1)
year1 = extract_int_year(str_year1)
year2 = extract_int_year(str_year2)
if year1 is None:
return year2
elif year2 is None:
return year1
return (year1 + year2) / 2
year_without_context = ''.join(filter(lambda c: c.isdecimal() or c == '-', year))
if year_without_context == "":
return default
return int(year_without_context)
[docs]
def to_datetime(date: str, default: datetime | None = None) -> datetime:
"""Convert the payload of DATE lines to datetime object.
If default is provided, return default on failure.
Otherwise, raise ValueError on failure.
If no day or month is specified, the first day and month are used.
The returned date is more precise than :py:func:`.extract_int_year`, but
works less often. Infact, this method only works for positive dates
(i.e. not :py:attr:`BC`) and :py:attr:`ABT`, :py:attr:`CAL`, :py:attr:`EST`
date types. For :py:attr:`BET_AND` or :py:attr:`TO_FROM` date types, use the
:py:func:`.to_datetime_range` function. The :py:attr:`BEF` and :py:attr:`AFT`
date types are not supported."""
if date[:4] in ("ABT ", "CAL ", "EST "):
date = date[4:]
year = extract_int_year(date)
if year and 0 < year < 1000:
four_digits_year = f"{year:04}"
date = date.replace(str(year), four_digits_year)
err = ValueError(f"Fail to parse {date} as a date")
for fmt in ("%d %b %Y", "%d %b %Y", "%b %Y", "%Y"):
try:
return datetime.strptime(date, fmt)
except ValueError as e:
err = e
if default is not None:
return default
raise err
[docs]
def to_datetime_range(
date: str,
default: datetime | None = None,
) -> tuple[datetime, datetime]:
"""Convert the payload of DATE lines to datetime objects.
If default is provided, return default on failure.
Otherwise, raise ValueError on failure.
A case of failure is if the date types is not :py:attr:`BET_AND`,
or :py:attr:`FROM_TO`.
Call :py:func:`.to_datetime` on the first and second date."""
if date.startswith("BET ") and date.count(" AND ") == 1:
part1, part2 = date[4:].split(" AND ")
elif date.startswith("FROM ") and date.count(" TO ") == 1:
part1, part2 = date[5:].split(" TO ")
elif default is not None:
return default, default
else:
raise ValueError(f"Fail to parse {date} as a date range")
return to_datetime(part1, default), to_datetime(part2, default)
[docs]
def add_time(date: datetime, time_: str) -> datetime:
"""Parse the payload of TIME lines.
If the time is parsed, return the datetime with its time set.
Otherwise, return the datetime as it was.
Note: datetime is immutable, thus the presence of a returned value."""
try:
t = time.fromisoformat(time_)
except ValueError:
return date
return datetime.combine(date.date(), t)
[docs]
def line_to_datetime(
date: TrueLine | FakeLine,
default: datetime | None = None,
) -> datetime:
"""Convert DATE lines to datetime object using the payload and the TIME sub-line.
If default is provided, return default on date parsing failure.
Otherwise, raise ValueError on failure.
When the date is a range, return the median date of the range.
"""
date_type = get_date_type(date.payload)
if date_type in (DateType.BET_AND, DateType.FROM_TO):
date1, date2 = to_datetime_range(date.payload, default)
date_value = date1 + (date2 - date1) / 2
else:
date_value = to_datetime(date.payload, default)
return add_time(date_value, date >= "TIME")