"""Classes and types for the data structure used to represent a gedcom."""
from typing import Iterator, Literal, TypeAlias
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
[docs]
SubmRef: TypeAlias = str
"""The cross-reference identifier of type '@SUB1@' or '@U1@' for a submitter
of the document."""
[docs]
SubnRef: TypeAlias = str
"""Deprecated. The cross-reference identifier of type '@SUB2@' for a submission."""
[docs]
IndiRef: TypeAlias = str
"""The cross-reference identifier of type '@I1@' for an individual."""
[docs]
FamRef: TypeAlias = str
"""The cross-reference identifier of type '@F1@' for a family."""
[docs]
SNoteRef: TypeAlias = str
"""The cross-reference identifier of type '@N1@' for a shared note."""
[docs]
SourRef: TypeAlias = str
"""The cross-reference identifier of type '@S1@' for a source document."""
[docs]
RepoRef: TypeAlias = str
"""The cross-reference identifier of type '@R1@' for a repository (an archive)."""
[docs]
ObjeRef: TypeAlias = str
"""The cross-reference identifier of type '@O1@' for an object (e.g. an image)."""
[docs]
XRef: TypeAlias = SubmRef | SubnRef | IndiRef | FamRef | SNoteRef | SourRef | RepoRef | ObjeRef
"""The cross-reference identifier indicates a record to which payloads may point."""
[docs]
VoidRef: TypeAlias = Literal['@VOID@']
"""A pointer used for unknown value where payload can't be let empty.
e.g.: In a family record, the line '2 CHIL @VOID@' indicates that the parents
had a child whom we know nothing. The line is used to keep the children birth order."""
[docs]
Pointer: TypeAlias = XRef | VoidRef
"""Generic pointer that is used in the payload to reference an existing record
or a non-existing one."""
[docs]
class Line(ABC):
"""Abstract base class for gedcom lines.
Implementations are :py:class:`.TrueLine` and :py:class:`.FakeLine`,
see these classes for more information.
"""
@abstractmethod
[docs]
def __bool__(self) -> bool:
"""True if it is a :py:class:`.TrueLine`,
False if it is a :py:class:`.FakeLine`."""
@property
@abstractmethod
[docs]
def payload(self) -> str:
"""See the description of :py:class:`.TrueLine` class."""
@property
@abstractmethod
[docs]
def payload_with_cont(self) -> str:
"""Return the multi-line payload into a single string.
Multi-line payloads are split into several :py:class:`Line` as written
in the original gedcom file. The corresponding sub-lines are with the
tags CONC and CONT. There are gathered into a single string by
concatenation of the different payload of each line. A newline is
added for the concatenation of sub-lines with the CONT tag."""
@property
@abstractmethod
[docs]
def sub_lines(self) -> list['TrueLine']:
"""See the description of :py:class:`.TrueLine` class."""
@abstractmethod
[docs]
def get_sub_lines(self, tag: str) -> list['TrueLine']:
"""Return all sub-lines having the given :any:`tag`.
Return an empty list if no line matches."""
[docs]
def __rshift__(self, tag: str) -> list['TrueLine']:
"""Alias for :py:meth:`get_sub_lines` to shorten the syntax
by using the >> operator."""
return self.get_sub_lines(tag)
@abstractmethod
[docs]
def get_sub_line(self, tag: str) -> 'TrueLine | FakeLine':
"""Return the first sub-line having the given :any:`tag`.
Return a :py:class:`.FakeLine` if no line matches."""
[docs]
def __gt__(self, tag: str) -> 'TrueLine | FakeLine':
"""Alias for :py:meth:`get_sub_line` to shorten the syntax
by using the > operator."""
return self.get_sub_line(tag)
@abstractmethod
[docs]
def get_sub_line_payload(self, tag: str) -> str:
"""Return the payload of the first sub-line having the given
:any:`tag`. Return an empty string if no line matches."""
[docs]
def __ge__(self, tag: str) -> str:
"""Alias for :py:meth:`get_sub_line_payload` to shorten the syntax
by using the >= operator."""
return self.get_sub_line_payload(tag)
[docs]
def get_all_sub_lines(self) -> Iterator['TrueLine']:
"""Recursively iterate on sub-lines.
All lines under the given line are returned. The order is preserved
as in the gedcom file, sub-sub-lines come before siblings lines."""
lines = self.sub_lines.copy()
while len(lines) > 0:
line = lines.pop(0)
yield line
lines = line.sub_lines + lines
[docs]
def get_source(self) -> str:
"""Return the gedcom text equivalent for the line and its sub-lines."""
if not self:
return ""
text = str(self) + "\n"
for sub_line in self.get_all_sub_lines():
text += str(sub_line) + "\n"
return text
[docs]
class FakeLine(Line):
"""Dummy line for syntactic sugar.
It allows the chaining of method calls. See these `examples
<https://github.com/GatienBouyer/fastgedcom/tree/main/examples>`_
for the usage of chaining.
The class behave like a :py:class:`.TrueLine`
(It has the same methods), but the payload is empty.
To differentiate a :py:class:`.FakeLine` from a
:py:class:`.TrueLine` a simple boolean test is enough.
"""
[docs]
payload = "" # pyright: ignore[reportGeneralTypeIssues]
[docs]
payload_with_cont = "" # pyright: ignore[reportGeneralTypeIssues]
[docs]
sub_lines = [] # pyright: ignore[reportGeneralTypeIssues]
[docs]
def __bool__(self) -> Literal[False]:
"""Return False."""
return False
[docs]
def get_sub_lines(self, tag: str) -> list['TrueLine']:
return []
[docs]
def __rshift__(self, tag: str) -> list['TrueLine']:
return self.get_sub_lines(tag)
[docs]
def get_sub_line(self, tag: str) -> 'TrueLine | FakeLine':
return fake_line
[docs]
def __gt__(self, tag: str) -> 'TrueLine | FakeLine':
return self.get_sub_line(tag)
[docs]
def get_sub_line_payload(self, tag: str) -> str:
return ""
[docs]
def __ge__(self, tag: str) -> str:
return self.get_sub_line_payload(tag)
[docs]
def __repr__(self) -> str:
"""Return the string representation of the class."""
return f"<{self.__class__.__qualname__}>"
[docs]
def __eq__(self, value: object) -> bool:
return isinstance(value, FakeLine)
@dataclass(slots=True)
[docs]
class TrueLine(Line):
"""Represent a line of a gedcom document.
Contain the :py:attr:`sub-lines` of the gedcom structure to form a recursive
representation of the gedcom file.
This class uses the simplified format, instead of the normalized
``Level [Xref] Tag [LineVal]`` format.
The format of a gedcom line: ``Level Tag Payload``.
In the simplified format, the :py:attr:`tag` is either the normalized Tag or the optional
Xref. Hence, the :py:attr:`payload` is the LineVal - when the Xref is not
present - or the normalized Tag plus the LineVal (generally an empty
string) - when the Xref is present. The Payload can be an empty string. As
for the :py:attr:`level`, it matches the definition of the gedcom standard.
"""
"""The line level defined by the gedcom standard."""
"""The cross-reference identifier for level 0 line (also called record identifier),
or the tag defining the information and the structure of the data."""
"""The payload of the structure, also called content or value.
Warning: Multi-line payloads are split into several :py:class:`Line` as
written in the original gedcom file. The corresponding sub-lines are with
the tags CONC and CONT. Use the :py:attr:`payload_with_cont` property to get the
complete multi-line payloads."""
[docs]
sub_lines: list['TrueLine'] = field(default_factory=list)
"""List of the sub-lines, i.e. the next-level lines that are part
of this structure."""
[docs]
def __bool__(self) -> Literal[True]:
"""Return True."""
return True
[docs]
def get_sub_lines(self, tag: str) -> list['TrueLine']:
return [sub_line for sub_line in self.sub_lines if sub_line.tag == tag]
[docs]
def __rshift__(self, tag: str) -> list['TrueLine']:
return self.get_sub_lines(tag)
[docs]
def get_sub_line(self, tag: str) -> 'TrueLine | FakeLine':
for sub_line in self.sub_lines:
if sub_line.tag == tag:
return sub_line
return fake_line
[docs]
def __gt__(self, tag: str) -> 'TrueLine | FakeLine':
return self.get_sub_line(tag)
[docs]
def get_sub_line_payload(self, tag: str) -> str:
for sub_line in self.sub_lines:
if sub_line.tag == tag:
return sub_line.payload
return ""
[docs]
def __ge__(self, tag: str) -> str:
return self.get_sub_line_payload(tag)
[docs]
def __str__(self) -> str:
"""Return the gedcom representation of the line (sub-lines excluded)."""
if not self.payload:
return f"{self.level} {self.tag}"
return f"{self.level} {self.tag} {self.payload}"
[docs]
def __repr__(self) -> str:
"""Return the string representation of the class."""
return f"<{self.__class__.__qualname__} {self.level} {self.tag} {self.payload} -> {len(self.sub_lines)}>"
@property
[docs]
def payload_with_cont(self) -> str:
text = self.payload
for sub_line in self.sub_lines:
if sub_line.tag == "CONT":
text += '\n' + sub_line.payload
elif sub_line.tag == "CONC":
text += sub_line.payload
return text
[docs]
Record: TypeAlias = TrueLine
"""A level 0 line referenced by an XRef in the document."""
[docs]
class Document():
"""Store all the information of the gedcom document.
All records (level 0 lines) are directly accessible via the
:py:attr:`records` dictionnary and the other lines are
accessible via :py:attr:`.TrueLine.sub_lines`."""
[docs]
records: dict[XRef, Record]
"""Dictionnary of records, accessible via :py:meth:`get_records` or
:py:meth:`__getitem__`. Access it directly to raise KeyError instead
of getting a :py:class:`.FakeLine`. Usefull when you a pretty sure of
the Record existing in the document."""
def __init__(self) -> None:
self.records = dict()
[docs]
def __iter__(self) -> Iterator[Record]:
"""Iterate on the lines of level 0:
the records, the header, and the TRLR line."""
return iter(self.records.values())
[docs]
def __contains__(self, identifier: XRef) -> bool:
"""Return True if the identifier refers to an existing record."""
return identifier in self.records
[docs]
def get_records(self, record_type: str) -> Iterator[Record]:
"""Return an iterator over records of that ``record_type``.
The type is the payload of level 0 lines: INDI, FAM, etc.."""
for record in self.records.values():
if record.payload == record_type:
yield record
[docs]
__rshift__ = get_records
"""Alias for :py:meth:`get_records` to shorten the syntax
by using the >> operator."""
[docs]
def get_record(self, identifier: XRef | Literal["HEAD"]) -> Record | FakeLine:
"""Return the record under that ``identifier``."""
return self.records.get(identifier, fake_line)
[docs]
__getitem__ = get_record
"""Alias for :py:meth:`get_record` to shorten the syntax
by using the [] operator."""
[docs]
def __eq__(self, __value: object) -> bool:
if not isinstance(__value, Document):
return False
return self.records == __value.records
[docs]
def get_source(self) -> str:
"""Return the gedcom text equivalent for the :py:class:`.Document` into a string.
Usefull to save a modified :py:class:`.Document` into a file."""
return "".join(record.get_source() for record in self.records.values())
""":py:class:`.FakeLine` instance returned by functions.
Used to avoid having multiple unnecessary instances of :py:class:`.FakeLine`."""