Skip to content

Commit 1e0117c

Browse files
authored
Batch example: Guess if input is coordinate pair, if so then do reverse (#51)
1 parent 4792173 commit 1e0117c

2 files changed

Lines changed: 108 additions & 39 deletions

File tree

Changes.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
unreleased
2+
Batch example: Guess if input is coordinate pair, if so then do reverse geocoding
3+
Batch example: Give example of input file format
4+
15
v2.3.0 Tue 04 Jul 2023
26
Batch example: Raise exception when API key fails (quota, missing API key)
37
Batch example: Raise exception when input file contains an empty line. Better

examples/batch.py

Lines changed: 104 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3,54 +3,105 @@
33
# Background tutorial on async programming with Python
44
# https://realpython.com/async-io-python/
55

6-
# Requires Python 3.7 or newer. Tested with 3.8 and 3.9.
6+
# Requires Python 3.7 or newer. Tested with 3.8/3.9/3.10/3.11.
77

88
# Installation:
99
# pip3 install --upgrade opencage asyncio aiohttp backoff tqdm
1010

11+
# Example (forward) input file:
12+
# 1,"Via Allende 8, Cascina, Toscana, Italia"
13+
# 2,"Via Coppi, 17, Formigine, Emilia-Romagna, Italia"
14+
# 3,"Via Dei Salici 20, Gallarate, Lombardia, Italia"
15+
# 4,"Via Vittorio Veneto N7, San Giuliano Terme, Toscana, Italia"
16+
# 5,"Via Tiro A Segno 8, Gallarate, Lombardia, Italia"
17+
18+
# Example (reverse) input file:
19+
# 1,"43.6783472,10.5533173"
20+
# 2,"44.5655041,10.8412106"
21+
# 3,"45.6823942,8.7919808"
22+
# 4,"43.7804922,10.402925"
23+
# 5,"45.6506236,8.8037173"
24+
# or
25+
# 1,43.6783472,10.5533173
26+
# 2,44.5655041,10.8412106
27+
# 3,45.6823942,8.7919808
28+
# 4,43.7804922,10.402925
29+
# 5,45.6506236,8.8037173
30+
31+
import os
1132
import sys
1233
import csv
34+
import re
1335
import asyncio
1436
import traceback
1537
import backoff
1638
from tqdm import tqdm
1739
from opencage.geocoder import OpenCageGeocode, AioHttpError
1840

41+
42+
43+
44+
45+
1946
API_KEY = ''
2047
FILENAME_INPUT_CSV = 'file_to_geocode.csv'
2148
FILENAME_OUTPUT_CSV = 'file_geocoded.csv'
49+
FORWARD_OR_REVERSE = 'guess' # 'forward' (address -> coordinates) or 'reverse' (coordinates -> address)
50+
# With 'guess' the script checks if the address is two numbers and then
51+
# assumes reverse
2252

23-
MAX_ITEMS = 100 # Howy man lines to read from the input file. Set to 0 for unlimited
53+
MAX_ITEMS = 100 # How many lines to read from the input file. Set to 0 for unlimited
2454
NUM_WORKERS = 3 # For 10 requests per second try 2-5
2555
REQUEST_TIMEOUT_SECONDS = 5 # For individual HTTP requests. Default is 1
2656
RETRY_MAX_TRIES = 10 # How often to retry if a HTTP request times out
2757
RETRY_MAX_TIME = 60 # Limit in seconds for retries
2858
SHOW_PROGRESS = True # Show progress bar
2959

60+
61+
62+
63+
64+
65+
66+
67+
68+
if os.path.exists(FILENAME_OUTPUT_CSV):
69+
sys.stderr.write(f"The output file '{FILENAME_OUTPUT_CSV}' already exists.\n")
70+
sys.exit(1)
71+
3072
csv_writer = csv.writer(open(FILENAME_OUTPUT_CSV, 'w', encoding='utf8', newline=''))
3173

3274
PROGRESS_BAR = SHOW_PROGRESS and tqdm(total=0, position=0, desc="Addresses geocoded", dynamic_ncols=True)
3375

34-
async def write_one_geocoding_result(geocoding_results, address, address_id):
35-
if geocoding_results is not None and len(geocoding_results):
36-
first_result = geocoding_results[0]
76+
# '40.78,-73.97' => true
77+
# '3rd Ave, New York' => false
78+
def guess_text_is_coordinate_pair(text):
79+
coordinate_pattern = r'^(-?\d+(\.\d+)?),(-?\d+(\.\d+)?)$'
80+
# x = 'yes' if bool(re.search(coordinate_pattern, text)) else 'no'
81+
# sys.stderr.write(f"{text} is coordinate_pair: {x}\n")
82+
return bool(re.search(coordinate_pattern, text))
83+
84+
async def write_one_geocoding_result(geocoding_result, address, address_id):
85+
# print(geocoding_result, file=sys.stderr)
86+
if geocoding_result is not None:
3787
row = [
3888
address_id,
39-
first_result['geometry']['lat'],
40-
first_result['geometry']['lng'],
89+
geocoding_result['geometry']['lat'],
90+
geocoding_result['geometry']['lng'],
4191
# Any of the components might be empty:
42-
first_result['components'].get('_type', ''),
43-
first_result['components'].get('country', ''),
44-
first_result['components'].get('county', ''),
45-
first_result['components'].get('city', ''),
46-
first_result['components'].get('postcode', ''),
47-
first_result['components'].get('road', ''),
48-
first_result['components'].get('house_number', ''),
49-
first_result['confidence'],
50-
first_result['formatted']
92+
geocoding_result['components'].get('_type', ''),
93+
geocoding_result['components'].get('country', ''),
94+
geocoding_result['components'].get('county', ''),
95+
geocoding_result['components'].get('city', ''),
96+
geocoding_result['components'].get('postcode', ''),
97+
geocoding_result['components'].get('road', ''),
98+
geocoding_result['components'].get('house_number', ''),
99+
geocoding_result['confidence'],
100+
geocoding_result['formatted']
51101
]
52102

53103
else:
104+
sys.stderr.write(f"not found, writing empty result: {address}\n")
54105
row = [
55106
address_id,
56107
0, # not to be confused with https://en.wikipedia.org/wiki/Null_Island
@@ -65,7 +116,6 @@ async def write_one_geocoding_result(geocoding_results, address, address_id):
65116
-1, # confidence values are 1-10 (lowest to highest), use -1 for unknown
66117
''
67118
]
68-
sys.stderr.write(f"not found, writing empty result: {address}\n")
69119
csv_writer.writerow(row)
70120

71121

@@ -84,24 +134,32 @@ def backoff_hdlr(details):
84134
on_backoff=backoff_hdlr)
85135
async def geocode_one_address(address, address_id):
86136
async with OpenCageGeocode(API_KEY) as geocoder:
87-
# address -> coordinates
88-
# note: you may also want to set other optional parameters like
89-
# countrycode, language, etc
90-
# see the full list: https://opencagedata.com/api#forward-opt
137+
global FORWARD_OR_REVERSE
91138
try:
92-
geocoding_results = await geocoder.geocode_async(address, no_annotations=1)
139+
if FORWARD_OR_REVERSE == 'reverse' or \
140+
(FORWARD_OR_REVERSE == 'guess' and guess_text_is_coordinate_pair(address)):
141+
# Reverse:
142+
# coordinates -> address, e.g. '40.78,-73.97' => '101, West 91st Street, New York'
143+
lon_lat = address.split(',')
144+
geocoding_results = await geocoder.reverse_geocode_async(
145+
lon_lat[0], lon_lat[1], no_annotations=1)
146+
else:
147+
# Forward:
148+
# address -> coordinates
149+
# note: you may also want to set other optional parameters like
150+
# countrycode, language, etc
151+
# see the full list: https://opencagedata.com/api#forward-opt
152+
geocoding_results = await geocoder.geocode_async(address, no_annotations=1)
93153
except Exception as exc:
94-
geocoding_results = None
95154
traceback.print_exception(exc, file=sys.stderr)
96155

97-
# coordinates -> address, e.g. '40.78,-73.97' => 101, West 91st Street, New York
98-
# lon_lat = address.split(',')
99-
# geocoding_result = await geocoder.reverse_geocode_async(lon_lat[0], lon_lat[1], no_annotations=1)
100-
# returns a single result so we convert it to a list
101-
# geocoding_results = [geocoding_result]
102-
103156
try:
104-
await write_one_geocoding_result(geocoding_results, address, address_id)
157+
if geocoding_results is not None and len(geocoding_results):
158+
geocoding_result = geocoding_results[0]
159+
else:
160+
geocoding_result = None
161+
162+
await write_one_geocoding_result(geocoding_result, address, address_id)
105163
except Exception as exc:
106164
traceback.print_exception(exc, file=sys.stderr)
107165

@@ -127,7 +185,7 @@ async def run_worker(worker_name, queue):
127185

128186
async def main():
129187
global PROGRESS_BAR
130-
assert sys.version_info >= (3, 7), "Script requires Python 3.7+."
188+
assert sys.version_info >= (3, 7), "Script requires Python 3.7 or newer"
131189

132190
## 1. Read CSV into a Queue
133191
## Each work_item is an address and id. The id will be part of the output,
@@ -138,16 +196,23 @@ async def main():
138196
##
139197
queue = asyncio.Queue(maxsize=MAX_ITEMS)
140198

141-
csv_reader = csv.reader(open(FILENAME_INPUT_CSV, 'r'), strict=True, skipinitialspace=True)
199+
with open(FILENAME_INPUT_CSV, 'r', encoding='utf-8') as infile:
200+
csv_reader = csv.reader(infile, strict=True, skipinitialspace=True)
201+
202+
for row in csv_reader:
203+
if len(row) == 0:
204+
raise Exception(f"Empty line in input file at line number {csv_reader.line_num}, aborting")
142205

143-
for row in csv_reader:
144-
if len(row) == 0:
145-
raise Exception(f"Empty line in input file at line number {csv_reader.line_num}, aborting")
206+
if FORWARD_OR_REVERSE == 'reverse' or \
207+
(FORWARD_OR_REVERSE == 'guess' and len(row) > 2 and \
208+
guess_text_is_coordinate_pair(f"{row[1]},{row[2]}")):
209+
work_item = {'id': row[0], 'address': f"{row[1]},{row[2]}"}
210+
else:
211+
work_item = {'id': row[0], 'address': row[1]}
146212

147-
work_item = {'id': row[0], 'address': row[1]}
148-
await queue.put(work_item)
149-
if queue.full():
150-
break
213+
await queue.put(work_item)
214+
if queue.full():
215+
break
151216

152217
sys.stderr.write(f"{queue.qsize()} work_items in queue\n")
153218

0 commit comments

Comments
 (0)