-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathbase.py
More file actions
79 lines (61 loc) · 2.33 KB
/
base.py
File metadata and controls
79 lines (61 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Union
from healthchain.interop.config_manager import InteropConfigManager
class BaseParser(ABC):
"""
Abstract base class for parsers that convert healthcare data formats.
"""
def __init__(self, config: InteropConfigManager):
self.config = config
@abstractmethod
def from_string(self, data: str) -> dict:
"""
Parse input data and convert it to a structured format.
This method should be implemented by subclasses to handle specific formats.
Args:
data: The input data as a string
Returns:
A dictionary containing the parsed data structure
"""
pass
def from_bytes(self, data: bytes, encoding: str = "utf-8") -> dict:
"""Parse input data from bytes.
Args:
data: The input data as bytes
encoding: Character encoding to use when decoding bytes
Returns:
A dictionary containing the parsed data structure
"""
return self.from_string(data.decode(encoding))
def from_file(self, file_path: Union[str, Path]) -> dict:
"""Parse input data from a file.
Args:
file_path: Path to the file to parse
Returns:
A dictionary containing the parsed data structure
Raises:
FileNotFoundError: If the file does not exist
"""
path = Path(file_path)
return self.from_string(path.read_text(encoding="utf-8"))
def from_directory(
self, directory_path: Union[str, Path], pattern: str = "*.xml"
) -> List[dict]:
"""Parse all matching files in a directory.
Args:
directory_path: Path to the directory containing files to parse
pattern: Glob pattern to match files (default: "*.xml")
Returns:
A list of dictionaries, one per parsed file
Raises:
NotADirectoryError: If the path is not a directory
"""
path = Path(directory_path)
if not path.is_dir():
raise NotADirectoryError(f"Not a directory: {path}")
results = []
for file_path in sorted(path.glob(pattern)):
if file_path.is_file():
results.append(self.from_file(file_path))
return results