Skip to content

Commit 2d9a828

Browse files
authored
Modernization (#68)
* remove Python 3.8, add Python 3.14 * overhaul method documenttions, use Google-style, mention which expections are raised * remove Python 3.8, add Python 3.14 * migrate from setup.py to pyproject.toml, keep a simple wrapper * replace os.path with pathlib, afaik works better on Windows * add test coverage report
1 parent d143d57 commit 2d9a828

File tree

8 files changed

+402
-186
lines changed

8 files changed

+402
-186
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ jobs:
1313
fail-fast: false
1414
matrix:
1515
py:
16+
- "3.14"
1617
- "3.13"
1718
- "3.12"
1819
- "3.11"
1920
- "3.10"
2021
- "3.9"
21-
- "3.8"
2222
os:
2323
- ubuntu-latest
2424
steps:

opencage/batch.py

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,13 @@
2020

2121

2222
class OpenCageBatchGeocoder():
23+
"""Batch geocoder that processes CSV files using the OpenCage API.
2324
24-
""" Called from command_line.py
25-
init() receives the parsed command line parameters
26-
geocode() receive an input and output CSV reader/writer and loops over the data
25+
Reads rows from a CSV input, geocodes each address using async workers,
26+
and writes results to a CSV output.
27+
28+
Args:
29+
options: Parsed command-line options from argparse.
2730
"""
2831

2932
def __init__(self, options):
@@ -33,9 +36,16 @@ def __init__(self, options):
3336
self.write_counter = 1
3437

3538
def __call__(self, *args, **kwargs):
39+
"""Run the batch geocoder synchronously via asyncio.run."""
3640
asyncio.run(self.geocode(*args, **kwargs))
3741

3842
async def geocode(self, csv_input, csv_output):
43+
"""Process a CSV input, geocode each row, and write results.
44+
45+
Args:
46+
csv_input: CSV reader for input rows.
47+
csv_output: CSV writer for output rows.
48+
"""
3949
if not self.options.dry_run:
4050
test = await self.test_request()
4151
if test['error']:
@@ -81,6 +91,12 @@ async def geocode(self, csv_input, csv_output):
8191
progress_bar.close()
8292

8393
async def test_request(self):
94+
"""Send a test geocoding request to verify the API key.
95+
96+
Returns:
97+
Dict with 'error' (None or exception) and 'free' (bool indicating
98+
whether a free trial account is being used).
99+
"""
84100
try:
85101
async with OpenCageGeocode(
86102
self.options.api_key,
@@ -99,6 +115,15 @@ async def test_request(self):
99115
return {'error': exc}
100116

101117
async def read_input(self, csv_input, queue):
118+
"""Read all rows from CSV input and add them to the work queue.
119+
120+
Args:
121+
csv_input: CSV reader for input rows.
122+
queue: Async queue to populate with parsed input items.
123+
124+
Returns:
125+
True if any warnings were encountered while reading, False otherwise.
126+
"""
102127
any_warnings = False
103128
for index, row in enumerate(csv_input):
104129
line_number = index + 1
@@ -119,6 +144,16 @@ async def read_input(self, csv_input, queue):
119144
return any_warnings
120145

121146
async def read_one_line(self, row, row_id):
147+
"""Parse a single CSV row into a work item for geocoding.
148+
149+
Args:
150+
row: List of column values from the CSV reader.
151+
row_id: 1-based line number of the row in the input.
152+
153+
Returns:
154+
Dict with keys 'row_id', 'address', 'original_columns',
155+
and 'warnings'.
156+
"""
122157
warnings = False
123158

124159
if self.options.input_columns:
@@ -159,6 +194,13 @@ async def read_one_line(self, row, row_id):
159194
return {'row_id': row_id, 'address': ','.join(address), 'original_columns': row, 'warnings': warnings}
160195

161196
async def worker(self, csv_output, queue, progress):
197+
"""Consume items from the queue and geocode each one.
198+
199+
Args:
200+
csv_output: CSV writer for output rows.
201+
queue: Async queue of work items to process.
202+
progress: tqdm progress bar, or False if disabled.
203+
"""
162204
while True:
163205
item = await queue.get()
164206

@@ -173,6 +215,14 @@ async def worker(self, csv_output, queue, progress):
173215
queue.task_done()
174216

175217
async def geocode_one_address(self, csv_output, row_id, address, original_columns):
218+
"""Geocode a single address and write the result to the output.
219+
220+
Args:
221+
csv_output: CSV writer for output rows.
222+
row_id: 1-based line number of the row in the input.
223+
address: Address string (or lat,lng for reverse geocoding).
224+
original_columns: Original CSV row columns to preserve in output.
225+
"""
176226
def on_backoff(details):
177227
if not self.options.quiet:
178228
sys.stderr.write("Backing off {wait:0.1f} seconds afters {tries} tries "
@@ -242,6 +292,18 @@ async def write_one_geocoding_result(
242292
geocoding_result,
243293
raw_response,
244294
original_columns):
295+
"""Write a single geocoding result row to the CSV output.
296+
297+
Appends the requested output columns to the original CSV columns.
298+
Rows are written in order unless the --unordered option is set.
299+
300+
Args:
301+
csv_output: CSV writer for output rows.
302+
row_id: 1-based line number of the row in the input.
303+
geocoding_result: First result dict from the API, or None.
304+
raw_response: Full API response dict.
305+
original_columns: Original CSV row columns to preserve in output.
306+
"""
245307
row = original_columns
246308

247309
for column in self.options.add_columns:
@@ -280,10 +342,32 @@ async def write_one_geocoding_result(
280342
self.write_counter = self.write_counter + 1
281343

282344
def log(self, message):
345+
"""Write a message to stderr unless quiet mode is enabled.
346+
347+
Args:
348+
message: Message string to display.
349+
"""
283350
if not self.options.quiet:
284351
sys.stderr.write(f"{message}\n")
285352

286353
def deep_get_result_value(self, data, keys, default=None):
354+
"""Retrieve a nested value from a dict using a list of keys.
355+
356+
Args:
357+
data: Dict to traverse.
358+
keys: List of keys to follow in sequence.
359+
default: Value to return if any key is missing.
360+
361+
Returns:
362+
The nested value, or default if the path doesn't exist.
363+
364+
Example:
365+
>>> data = {'status': {'code': 200, 'message': 'OK'}}
366+
>>> self.deep_get_result_value(data, ['status', 'message'])
367+
'OK'
368+
>>> self.deep_get_result_value(data, ['missing', 'key'], '')
369+
''
370+
"""
287371
for key in keys:
288372
if isinstance(data, dict):
289373
data = data.get(key, default)

opencage/command_line.py

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import argparse
22
import sys
3-
import os
43
import io
4+
from pathlib import Path
55
import re
66
import csv
77

@@ -10,9 +10,12 @@
1010

1111

1212
def main(args=sys.argv[1:]):
13-
options = parse_args(args)
13+
"""Entry point for the OpenCage CLI.
1414
15-
assert sys.version_info >= (3, 8), "Script requires Python 3.8 or newer"
15+
Args:
16+
args: Command-line arguments (defaults to sys.argv[1:]).
17+
"""
18+
options = parse_args(args)
1619

1720
geocoder = OpenCageBatchGeocoder(options)
1821

@@ -25,11 +28,19 @@ def main(args=sys.argv[1:]):
2528

2629

2730
def parse_args(args):
31+
"""Parse and validate command-line arguments.
32+
33+
Args:
34+
args: List of command-line argument strings.
35+
36+
Returns:
37+
Parsed argparse.Namespace with all options set.
38+
"""
2839
if len(args) == 0:
2940
print("To display help use 'opencage -h', 'opencage forward -h' or 'opencage reverse -h'", file=sys.stderr)
3041
sys.exit(1)
3142

32-
parser = argparse.ArgumentParser(description=f'Opencage CLI {__version__}')
43+
parser = argparse.ArgumentParser(description=f'OpenCage CLI {__version__}')
3344
parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}')
3445

3546
subparsers = parser.add_subparsers(dest='command')
@@ -61,9 +72,9 @@ def parse_args(args):
6172

6273
options = parser.parse_args(args)
6374

64-
if os.path.exists(options.output) and not options.dry_run:
75+
if Path(options.output).exists() and not options.dry_run:
6576
if options.overwrite:
66-
os.remove(options.output)
77+
Path(options.output).unlink()
6778
else:
6879
print(
6980
f"Error: The output file '{options.output}' already exists. You can add --overwrite to your command.",
@@ -78,6 +89,14 @@ def parse_args(args):
7889

7990

8091
def add_optional_arguments(parser):
92+
"""Add optional arguments shared by forward and reverse subcommands.
93+
94+
Args:
95+
parser: argparse subparser to add arguments to.
96+
97+
Returns:
98+
The parser with arguments added.
99+
"""
81100
parser.add_argument(
82101
"--headers",
83102
action="store_true",
@@ -129,6 +148,21 @@ def add_optional_arguments(parser):
129148

130149

131150
def api_key_type(apikey):
151+
"""Validate an OpenCage API key format.
152+
153+
Expects a 32-character lowercase hex string, optionally prefixed
154+
with ``oc_gc_`` (e.g. ``oc_gc_1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d``
155+
or ``1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d``).
156+
157+
Args:
158+
apikey: API key string to validate.
159+
160+
Returns:
161+
The validated API key string.
162+
163+
Raises:
164+
argparse.ArgumentTypeError: If the key doesn't match the expected format.
165+
"""
132166
pattern = re.compile(r"^(oc_gc_)?[0-9a-f]{32}$")
133167

134168
if not pattern.match(apikey):
@@ -138,6 +172,16 @@ def api_key_type(apikey):
138172

139173

140174
def ranged_type(value_type, min_value, max_value):
175+
"""Create an argparse type function that enforces a value range.
176+
177+
Args:
178+
value_type: Type to convert the argument to (e.g. int, float).
179+
min_value: Minimum allowed value (inclusive).
180+
max_value: Maximum allowed value (inclusive).
181+
182+
Returns:
183+
A type-checking function suitable for argparse's type parameter.
184+
"""
141185
def range_checker(arg: str):
142186
try:
143187
f = value_type(arg)
@@ -152,6 +196,14 @@ def range_checker(arg: str):
152196

153197

154198
def comma_separated_type(value_type):
199+
"""Create an argparse type function that parses comma-separated values.
200+
201+
Args:
202+
value_type: Type to convert each element to (e.g. int, str).
203+
204+
Returns:
205+
A type-checking function suitable for argparse's type parameter.
206+
"""
155207
def comma_separated(arg: str):
156208
if not arg:
157209
return []
@@ -162,6 +214,17 @@ def comma_separated(arg: str):
162214

163215

164216
def comma_separated_dict_type(arg):
217+
"""Parse a comma-separated list of key=value pairs into a dict.
218+
219+
Args:
220+
arg: String like "key1=val1,key2=val2".
221+
222+
Returns:
223+
Dict of parsed key-value pairs, or empty dict if arg is empty.
224+
225+
Raises:
226+
argparse.ArgumentTypeError: If the string is not valid key=value format.
227+
"""
165228
if not arg:
166229
return {}
167230

0 commit comments

Comments
 (0)