Skip to content

Commit 7dbd557

Browse files
authored
Add progress bar to python batch example (#46)
1 parent b57110b commit 7dbd557

1 file changed

Lines changed: 19 additions & 2 deletions

File tree

examples/batch.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66
# Requires Python 3.7 or newer. Tested with 3.8 and 3.9.
77

88
# Installation:
9-
# pip3 install opencage asyncio aiohttp backoff
9+
# pip3 install opencage asyncio aiohttp backoff tqdm
1010

1111
import sys, random, time
1212
import csv
1313
import backoff
1414
import asyncio
1515
import traceback
1616
from opencage.geocoder import OpenCageGeocode, AioHttpError
17+
from tqdm import tqdm
1718

1819
api_key = ''
1920
infile = 'file_to_geocode.csv'
@@ -24,9 +25,12 @@
2425
timeout = 5 # For individual HTTP requests. In seconds, default is 1
2526
retry_max_tries = 10 # How often to retry if a HTTP request times out
2627
retry_max_time = 60 # Limit in seconds for retries
28+
show_progress = True # Show progress bar
2729

2830
csv_writer = csv.writer(open(outfile, 'w', newline=''))
2931

32+
progress_bar = show_progress and tqdm(total=0, position=0, desc="Addresses geocoded", dynamic_ncols=True)
33+
3034
async def write_one_geocoding_result(geocoding_results, address, address_id):
3135
if geocoding_results != None and len(geocoding_results):
3236
first_result = geocoding_results[0]
@@ -100,22 +104,29 @@ async def geocode_one_address(address, address_id):
100104

101105

102106
async def run_worker(worker_name, queue):
107+
global progress_bar
103108
sys.stderr.write("Worker %s starts...\n" % worker_name)
109+
104110
while True:
105111
work_item = await queue.get()
106112
address_id = work_item['id']
107113
address = work_item['address']
108114
await geocode_one_address(address, address_id)
115+
116+
if show_progress:
117+
progress_bar.update(1)
118+
109119
queue.task_done()
110120

111121

112122

113123

114124
async def main():
125+
global progress_bar
115126
assert sys.version_info >= (3, 7), "Script requires Python 3.7+."
116127

117128
## 1. Read CSV into a Queue
118-
## Each work_item is an address an id. The id will be part of the output,
129+
## Each work_item is an address and id. The id will be part of the output,
119130
## easy to add more settings. Named 'work_item' to avoid the words
120131
## 'address' or 'task' which are used elsewhere
121132
##
@@ -133,6 +144,9 @@ async def main():
133144

134145
sys.stderr.write("%d work_items in queue\n" % queue.qsize())
135146

147+
if show_progress:
148+
progress_bar.total = queue.qsize()
149+
progress_bar.refresh()
136150

137151
## 2. Create tasks workers. That is coroutines, each taks take work_items
138152
## from the queue until it's empty. Tasks run in parallel
@@ -158,6 +172,9 @@ async def main():
158172
for task in tasks:
159173
task.cancel()
160174

175+
if show_progress:
176+
progress_bar.close()
177+
161178
sys.stderr.write("All done.\n")
162179

163180

0 commit comments

Comments
 (0)