Skip to content

AbstractRepository

AbstractRepository

Bases: ABC

An abstract class representing a repository. It defines the methods that must be implemented by any repository class.

Attributes:

Name Type Description
repositoryID str

An identifier for the repository that is used to reference it internally. This is not exposed to the outside or published in an FAIR-DO. (abstract)

Source code in src/nmr_FAIR_DOs/repositories/AbstractRepository.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
class AbstractRepository(ABC):
    """
    An abstract class representing a repository.
    It defines the methods that must be implemented by any repository class.

    Attributes:
        repositoryID (str): An identifier for the repository that is used to reference it internally. This is not exposed to the outside or published in an FAIR-DO. (abstract)
    """

    @property
    @abstractmethod
    def repositoryID(self) -> str:
        """
        Returns an internal identifier for the repository.
        This is not exposed to the outside or published in an FAIR-DO.

        Returns:
            str: The internal id of the repository
        """
        return NotImplemented

    @abstractmethod
    async def getAllAvailableResources(self) -> list[dict] | None:
        """
        Returns a list of all resources available in the repository.

        Returns:
            list[dict]: A list of all resources available in the repository
            None: If no resources are available in the repository
        """
        # By default, return all resources available in the repository for the entire time frame from the beginning of time to the end of time
        return await self.getResourcesForTimeFrame(datetime.min, datetime.max)

    @abstractmethod
    async def getResourcesForTimeFrame(
        self, start: datetime, end: datetime
    ) -> list[dict]:
        """
        Returns a list of all resources available in the repository within the specified time frame.

        Args:
            start (datetime): The start of the time frame
            end (datetime): The end of the time frame

        Returns:
            list[dict]: A list of all resources available in the repository within the specified time frame
        """
        return NotImplemented

    @abstractmethod
    async def extractPIDRecordFromResource(
        self,
        resource: dict,
        add_relationship: Callable[
            [str, list[PIDRecordEntry], Callable[[str], None] | None], str
        ],
    ) -> PIDRecord | None:
        """
        Extracts a PID record from a resource of the repository.
        This method expects an `add_relationship` function that is used to create relationships between FAIR DOs.
        For more information on the `add_relationship` function, see in ``lib.py``.
        It expects the following arguments in the following order: (str, list[PIDRecordEntry], Callable[[str], None] | None) and returns the actual PID of the target FAIR-DO.
        The first argument is the (presumed) PID of the target record.
        The second argument is a list of entries to add to the target record.
        Optionally, the third argument is a function that is executed on success of adding the entries to the target record.
        It is meant to be used to create the back-reference relationship from the target record to the source record.

        Args:
            resource (dict): The resource to extract the PID record from
            add_relationship (function): The function to add entries to a PIDRecord. This function expects the following arguments in the following order: (str, list[PIDRecordEntry], Callable[[str], None] | None) and returns a str.

        Returns:
            PIDRecord: The PID record extracted from the resource
            None: If the PID record cannot be extracted from the resource
        """
        return NotImplemented

    @abstractmethod
    def getRepositoryFDO(self) -> PIDRecord:
        """
        Define the PID record for the repository.
        This record will be referenced by all extracted PID records from the repository in the "hasPrimarySource" relationship.

        Returns:
            PIDRecord: The PID record for the repository
        """
        return NotImplemented

    async def extractAll(
        self,
        urls: list[str],
        addEntries: Callable[
            [str, list[PIDRecordEntry], Callable[[str], None] | None], str
        ],
    ) -> tuple[list[PIDRecord], list[dict]] | list[PIDRecord]:
        """
        Extracts PID records from all resources available in the repository.

        Args:
            urls (list[str]): A list of URLs for all resources available in the repository. (Optional) If not provided, all available URLs will be fetched from the repository.
            addEntries (function): The function to add entries to a PIDRecord. This function expects the following arguments in the following order: (str, list[PIDRecordEntry]) and returns a str. The first argument is the (presumed) PID of the target record, the second argument is a list of entries to add to the target record. It returns the PID of the target record.

        Returns:
            tuple[list[PIDRecord], list[dict[str, str]]]: A tuple containing a list of extracted PID records and a list of errors encountered during extraction
            list[PIDRecord]: A list of extracted PID records
        """
        resources = []

        if urls is None or not isinstance(urls, list) or len(urls) == 0:
            try:
                resources = await self.getAllAvailableResources()
            except Exception as e:
                logger.error(
                    f"Error getting resources from repository {self.repositoryID}: {str(e)}"
                )
                return []
        else:
            resources = await fetch_multiple(urls)

        if resources is None or len(urls) == 0:
            logger.warning(f"No resources available for repository {self.repositoryID}")
            return []

        pid_records: list[PIDRecord] = []
        errors: list[dict] = []

        for resource in resources:
            try:
                pid_record = await self.extractPIDRecordFromResource(
                    resource,
                    addEntries,
                )
                if pid_record is not None:
                    pid_records.append(pid_record)
            except Exception as e:
                logger.error(f"Error extracting PID record from {resource}: {str(e)}")
                errors.append(
                    {
                        "url": resource,
                        "error": str(e),
                        "timestamp": datetime.now().isoformat(),
                    }
                )

        if errors:
            return pid_records, errors
        return pid_records

repositoryID abstractmethod property

repositoryID: str

Returns an internal identifier for the repository. This is not exposed to the outside or published in an FAIR-DO.

Returns:

Name Type Description
str str

The internal id of the repository

getAllAvailableResources abstractmethod async

getAllAvailableResources() -> list[dict] | None

Returns a list of all resources available in the repository.

Returns:

Name Type Description
list[dict] | None

list[dict]: A list of all resources available in the repository

None list[dict] | None

If no resources are available in the repository

Source code in src/nmr_FAIR_DOs/repositories/AbstractRepository.py
53
54
55
56
57
58
59
60
61
62
63
@abstractmethod
async def getAllAvailableResources(self) -> list[dict] | None:
    """
    Returns a list of all resources available in the repository.

    Returns:
        list[dict]: A list of all resources available in the repository
        None: If no resources are available in the repository
    """
    # By default, return all resources available in the repository for the entire time frame from the beginning of time to the end of time
    return await self.getResourcesForTimeFrame(datetime.min, datetime.max)

getResourcesForTimeFrame abstractmethod async

getResourcesForTimeFrame(
    start: datetime, end: datetime
) -> list[dict]

Returns a list of all resources available in the repository within the specified time frame.

Parameters:

Name Type Description Default
start datetime

The start of the time frame

required
end datetime

The end of the time frame

required

Returns:

Type Description
list[dict]

list[dict]: A list of all resources available in the repository within the specified time frame

Source code in src/nmr_FAIR_DOs/repositories/AbstractRepository.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
@abstractmethod
async def getResourcesForTimeFrame(
    self, start: datetime, end: datetime
) -> list[dict]:
    """
    Returns a list of all resources available in the repository within the specified time frame.

    Args:
        start (datetime): The start of the time frame
        end (datetime): The end of the time frame

    Returns:
        list[dict]: A list of all resources available in the repository within the specified time frame
    """
    return NotImplemented

extractPIDRecordFromResource abstractmethod async

extractPIDRecordFromResource(
    resource: dict,
    add_relationship: Callable[
        [
            str,
            list[PIDRecordEntry],
            Callable[[str], None] | None,
        ],
        str,
    ],
) -> PIDRecord | None

Extracts a PID record from a resource of the repository. This method expects an add_relationship function that is used to create relationships between FAIR DOs. For more information on the add_relationship function, see in lib.py. It expects the following arguments in the following order: (str, list[PIDRecordEntry], Callable[[str], None] | None) and returns the actual PID of the target FAIR-DO. The first argument is the (presumed) PID of the target record. The second argument is a list of entries to add to the target record. Optionally, the third argument is a function that is executed on success of adding the entries to the target record. It is meant to be used to create the back-reference relationship from the target record to the source record.

Parameters:

Name Type Description Default
resource dict

The resource to extract the PID record from

required
add_relationship function

The function to add entries to a PIDRecord. This function expects the following arguments in the following order: (str, list[PIDRecordEntry], Callable[[str], None] | None) and returns a str.

required

Returns:

Name Type Description
PIDRecord PIDRecord | None

The PID record extracted from the resource

None PIDRecord | None

If the PID record cannot be extracted from the resource

Source code in src/nmr_FAIR_DOs/repositories/AbstractRepository.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
@abstractmethod
async def extractPIDRecordFromResource(
    self,
    resource: dict,
    add_relationship: Callable[
        [str, list[PIDRecordEntry], Callable[[str], None] | None], str
    ],
) -> PIDRecord | None:
    """
    Extracts a PID record from a resource of the repository.
    This method expects an `add_relationship` function that is used to create relationships between FAIR DOs.
    For more information on the `add_relationship` function, see in ``lib.py``.
    It expects the following arguments in the following order: (str, list[PIDRecordEntry], Callable[[str], None] | None) and returns the actual PID of the target FAIR-DO.
    The first argument is the (presumed) PID of the target record.
    The second argument is a list of entries to add to the target record.
    Optionally, the third argument is a function that is executed on success of adding the entries to the target record.
    It is meant to be used to create the back-reference relationship from the target record to the source record.

    Args:
        resource (dict): The resource to extract the PID record from
        add_relationship (function): The function to add entries to a PIDRecord. This function expects the following arguments in the following order: (str, list[PIDRecordEntry], Callable[[str], None] | None) and returns a str.

    Returns:
        PIDRecord: The PID record extracted from the resource
        None: If the PID record cannot be extracted from the resource
    """
    return NotImplemented

getRepositoryFDO abstractmethod

getRepositoryFDO() -> PIDRecord

Define the PID record for the repository. This record will be referenced by all extracted PID records from the repository in the "hasPrimarySource" relationship.

Returns:

Name Type Description
PIDRecord PIDRecord

The PID record for the repository

Source code in src/nmr_FAIR_DOs/repositories/AbstractRepository.py
109
110
111
112
113
114
115
116
117
118
@abstractmethod
def getRepositoryFDO(self) -> PIDRecord:
    """
    Define the PID record for the repository.
    This record will be referenced by all extracted PID records from the repository in the "hasPrimarySource" relationship.

    Returns:
        PIDRecord: The PID record for the repository
    """
    return NotImplemented

extractAll async

extractAll(
    urls: list[str],
    addEntries: Callable[
        [
            str,
            list[PIDRecordEntry],
            Callable[[str], None] | None,
        ],
        str,
    ],
) -> tuple[list[PIDRecord], list[dict]] | list[PIDRecord]

Extracts PID records from all resources available in the repository.

Parameters:

Name Type Description Default
urls list[str]

A list of URLs for all resources available in the repository. (Optional) If not provided, all available URLs will be fetched from the repository.

required
addEntries function

The function to add entries to a PIDRecord. This function expects the following arguments in the following order: (str, list[PIDRecordEntry]) and returns a str. The first argument is the (presumed) PID of the target record, the second argument is a list of entries to add to the target record. It returns the PID of the target record.

required

Returns:

Type Description
tuple[list[PIDRecord], list[dict]] | list[PIDRecord]

tuple[list[PIDRecord], list[dict[str, str]]]: A tuple containing a list of extracted PID records and a list of errors encountered during extraction

tuple[list[PIDRecord], list[dict]] | list[PIDRecord]

list[PIDRecord]: A list of extracted PID records

Source code in src/nmr_FAIR_DOs/repositories/AbstractRepository.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
async def extractAll(
    self,
    urls: list[str],
    addEntries: Callable[
        [str, list[PIDRecordEntry], Callable[[str], None] | None], str
    ],
) -> tuple[list[PIDRecord], list[dict]] | list[PIDRecord]:
    """
    Extracts PID records from all resources available in the repository.

    Args:
        urls (list[str]): A list of URLs for all resources available in the repository. (Optional) If not provided, all available URLs will be fetched from the repository.
        addEntries (function): The function to add entries to a PIDRecord. This function expects the following arguments in the following order: (str, list[PIDRecordEntry]) and returns a str. The first argument is the (presumed) PID of the target record, the second argument is a list of entries to add to the target record. It returns the PID of the target record.

    Returns:
        tuple[list[PIDRecord], list[dict[str, str]]]: A tuple containing a list of extracted PID records and a list of errors encountered during extraction
        list[PIDRecord]: A list of extracted PID records
    """
    resources = []

    if urls is None or not isinstance(urls, list) or len(urls) == 0:
        try:
            resources = await self.getAllAvailableResources()
        except Exception as e:
            logger.error(
                f"Error getting resources from repository {self.repositoryID}: {str(e)}"
            )
            return []
    else:
        resources = await fetch_multiple(urls)

    if resources is None or len(urls) == 0:
        logger.warning(f"No resources available for repository {self.repositoryID}")
        return []

    pid_records: list[PIDRecord] = []
    errors: list[dict] = []

    for resource in resources:
        try:
            pid_record = await self.extractPIDRecordFromResource(
                resource,
                addEntries,
            )
            if pid_record is not None:
                pid_records.append(pid_record)
        except Exception as e:
            logger.error(f"Error extracting PID record from {resource}: {str(e)}")
            errors.append(
                {
                    "url": resource,
                    "error": str(e),
                    "timestamp": datetime.now().isoformat(),
                }
            )

    if errors:
        return pid_records, errors
    return pid_records