Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 60 additions & 45 deletions data/mapping-specifications.json
Original file line number Diff line number Diff line change
Expand Up @@ -343,19 +343,18 @@
"metadata_completeness_score": 0.2727
},
{
"id": "https://w3id.org/sssom/mappings/67df215d-fffd-4d37-b42f-268fa6cea80c",
"license": "https://w3id.org/sssom/license/unspecified",
"type": "sssom",
"id": "http://w3id.org/sssom/commons/monarch/mappings/ncit-hp.sssom.tsv",
"content_url": "http://w3id.org/sssom/commons/monarch/mappings/ncit-hp.sssom.tsv",
"type": "sssom",
"status": "fetch_error",
"registries": [
{
"id": "https://w3id.org/sssom/commons/monarch",
"name": "Monarch Mapping Commons",
"url": "https://github.com/monarch-initiative/monarch-mapping-commons"
}
],
"status": "ok",
"metadata_completeness_score": 0.2727
"metadata_completeness_score": 0.0
},
{
"id": "https://w3id.org/sssom/mappings/467f578f-3319-4fb4-92fb-bddaf41b0c1f",
Expand Down Expand Up @@ -557,20 +556,18 @@
"metadata_completeness_score": 0.3636
},
{
"id": "https://doi.org/10.5281/zenodo.6949696",
"version": "V1.1",
"license": "https://creativecommons.org/publicdomain/zero/1.0/",
"type": "sssom",
"id": "https://w3id.org/cpont/mappings/omop2obo_drug_exposure.sssom.tsv",
"content_url": "https://w3id.org/cpont/mappings/omop2obo_drug_exposure.sssom.tsv",
"type": "sssom",
"status": "fetch_error",
"registries": [
{
"id": "https://w3id.org/cpont/mappings",
"name": "C-Path Mapping Commons",
"url": "https://gitlab.c-path.org/c-pathontology/mapping-commons"
}
],
"status": "ok",
"metadata_completeness_score": 0.3636
"metadata_completeness_score": 0.0
},
{
"id": "https://doi.org/10.5281/zenodo.6949858",
Expand Down Expand Up @@ -618,7 +615,8 @@
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"status": "ok",
Expand All @@ -633,7 +631,8 @@
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"status": "ok",
Expand All @@ -643,11 +642,12 @@
"id": "https://zenodo.org/records/15504009/files/mappings.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15504009/files/mappings.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -656,11 +656,12 @@
"id": "https://zenodo.org/records/15826794/files/priority.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826794/files/priority.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -669,11 +670,12 @@
"id": "https://zenodo.org/records/15826794/files/processed.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826794/files/processed.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -682,11 +684,12 @@
"id": "https://zenodo.org/records/15826794/files/raw.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826794/files/raw.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -695,11 +698,12 @@
"id": "https://zenodo.org/records/15826779/files/priority.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826779/files/priority.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -708,11 +712,12 @@
"id": "https://zenodo.org/records/15826779/files/processed.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826779/files/processed.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -721,11 +726,12 @@
"id": "https://zenodo.org/records/15826779/files/raw.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826779/files/raw.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -734,11 +740,12 @@
"id": "https://zenodo.org/records/15826768/files/priority.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826768/files/priority.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -747,11 +754,12 @@
"id": "https://zenodo.org/records/15826768/files/processed.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826768/files/processed.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -760,11 +768,12 @@
"id": "https://zenodo.org/records/15826768/files/raw.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826768/files/raw.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -773,11 +782,12 @@
"id": "https://zenodo.org/records/15826754/files/priority.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826754/files/priority.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -786,11 +796,12 @@
"id": "https://zenodo.org/records/15826754/files/processed.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826754/files/processed.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -799,11 +810,12 @@
"id": "https://zenodo.org/records/15826754/files/raw.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826754/files/raw.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -812,11 +824,12 @@
"id": "https://zenodo.org/records/15826693/files/priority.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826693/files/priority.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -825,11 +838,12 @@
"id": "https://zenodo.org/records/15826693/files/processed.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826693/files/processed.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand All @@ -838,11 +852,12 @@
"id": "https://zenodo.org/records/15826693/files/raw.sssom.tsv.gz",
"content_url": "https://zenodo.org/records/15826693/files/raw.sssom.tsv.gz",
"type": "sssom",
"status": "nonstandard_format",
"status": "no_metadata",
"registries": [
{
"id": "https://github.com/biopragmatics/mapping-registry",
"name": "Biopragmatics Mapping Registry"
"name": "Biopragmatics Mapping Registry",
"url": "https://github.com/biopragmatics"
}
],
"metadata_completeness_score": 0.0
Expand Down
22 changes: 18 additions & 4 deletions scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import yaml
import requests
import json
import gzip
import io
import os
import logging
import traceback
Expand Down Expand Up @@ -33,10 +35,18 @@ def _registry_info(registry_data: dict) -> dict:
}


def _classify_error(mapping_set_uri: str, error: Exception) -> str:
def _iter_gz_lines(response: requests.Response):
"""Yield decompressed text lines from a gzipped HTTP response, streaming."""
decompressor = gzip.GzipFile(fileobj=response.raw)
reader = io.TextIOWrapper(decompressor, encoding="utf-8")
for line in reader:
yield line.rstrip("\n")


def _classify_error(error: Exception) -> str:
"""Classify a mapping set processing error into a status string."""
error_msg = str(error).lower()
if "bytes-like object" in error_msg or mapping_set_uri.endswith(".gz"):
if "bytes-like object" in error_msg:
return "nonstandard_format"
if "no #-commented header" in error_msg:
return "no_metadata"
Expand Down Expand Up @@ -67,7 +77,11 @@ def _process_sssom_mapping_set(mapping_set_id: str, mapping_set_uri: str) -> dic
raise RuntimeError(f"HTTP {response.status_code} fetching {mapping_set_uri}")

try:
meta = parse_sssom_tsv(response.iter_lines(decode_unicode=True))
if mapping_set_uri.endswith(".gz"):
lines = _iter_gz_lines(response)
else:
lines = response.iter_lines(decode_unicode=True)
meta = parse_sssom_tsv(lines)
finally:
response.close()

Expand Down Expand Up @@ -184,7 +198,7 @@ def prepare_mapping_registry(registry_file, output_file, log_file):
mapping_set_id, registry_id, registry_title,
)
except Exception as e:
status = _classify_error(mapping_set_uri, e)
status = _classify_error(e)
stub = _stub_spec(mapping_set_id, status)
stub["registries"] = [info]
specifications.append(stub)
Expand Down
Loading