66# Requires Python 3.7 or newer. Tested with 3.8 and 3.9.
77
88# Installation:
9- # pip3 install opencage asyncio aiohttp backoff
9+ # pip3 install opencage asyncio aiohttp backoff tqdm
1010
1111import sys , random , time
1212import csv
1313import backoff
1414import asyncio
1515import traceback
1616from opencage .geocoder import OpenCageGeocode , AioHttpError
17+ from tqdm import tqdm
1718
1819api_key = ''
1920infile = 'file_to_geocode.csv'
2425timeout = 5 # For individual HTTP requests. In seconds, default is 1
2526retry_max_tries = 10 # How often to retry if a HTTP request times out
2627retry_max_time = 60 # Limit in seconds for retries
28+ show_progress = True # Show progress bar
2729
2830csv_writer = csv .writer (open (outfile , 'w' , newline = '' ))
2931
32+ progress_bar = show_progress and tqdm (total = 0 , position = 0 , desc = "Addresses geocoded" , dynamic_ncols = True )
33+
3034async def write_one_geocoding_result (geocoding_results , address , address_id ):
3135 if geocoding_results != None and len (geocoding_results ):
3236 first_result = geocoding_results [0 ]
@@ -100,22 +104,29 @@ async def geocode_one_address(address, address_id):
100104
101105
102106async def run_worker (worker_name , queue ):
107+ global progress_bar
103108 sys .stderr .write ("Worker %s starts...\n " % worker_name )
109+
104110 while True :
105111 work_item = await queue .get ()
106112 address_id = work_item ['id' ]
107113 address = work_item ['address' ]
108114 await geocode_one_address (address , address_id )
115+
116+ if show_progress :
117+ progress_bar .update (1 )
118+
109119 queue .task_done ()
110120
111121
112122
113123
114124async def main ():
125+ global progress_bar
115126 assert sys .version_info >= (3 , 7 ), "Script requires Python 3.7+."
116127
117128 ## 1. Read CSV into a Queue
118- ## Each work_item is an address an id. The id will be part of the output,
129+ ## Each work_item is an address and id. The id will be part of the output,
119130 ## easy to add more settings. Named 'work_item' to avoid the words
120131 ## 'address' or 'task' which are used elsewhere
121132 ##
@@ -133,6 +144,9 @@ async def main():
133144
134145 sys .stderr .write ("%d work_items in queue\n " % queue .qsize ())
135146
147+ if show_progress :
148+ progress_bar .total = queue .qsize ()
149+ progress_bar .refresh ()
136150
137151 ## 2. Create tasks workers. That is coroutines, each taks take work_items
138152 ## from the queue until it's empty. Tasks run in parallel
@@ -158,6 +172,9 @@ async def main():
158172 for task in tasks :
159173 task .cancel ()
160174
175+ if show_progress :
176+ progress_bar .close ()
177+
161178 sys .stderr .write ("All done.\n " )
162179
163180
0 commit comments