#!/usr/bin/env perl

# Import geo-coding data into an SQLite database.

# Copyright 2020-2023 Nigel Horne.

# The program code is released under the following licence: GPL2 for personal use on a single computer.
# All other users (including Commercial, Charity, Educational, Government)
# must apply in writing for a licence for use from Nigel Horne at <njh at nigelhorne.com>.

# Set OSM_HOME, OPENADDR_HOME, DR5HN_HOME and WHOSONFIRST_HOME to the directories where the data will be downloaded

# see bin/download_databases which will download the databases and call this script

# Download the OPENADDR_HOME data from http://results.openaddresses.io.
#	cd $OPENADDR_HOME
#	wget --quiet https://data.openaddresses.io/openaddr-collected-global.zip
#	wget --quiet https://data.openaddresses.io/openaddr-collected-global-sa.zip
# or
#	wget -nc --quiet https://s3.amazonaws.com/data.openaddresses.io/openaddr-collected-global.zip
#	wget -nc --quiet https://s3.amazonaws.com/data.openaddresses.io/openaddr-collected-global-sa.zip
# Remove all the data you're not going to use, I only keep au, ca, us
#	unzip openaddr-collected-global.zip
#	unzip openaddr-collected-global-sa.zip
#	echo ?? | sed 's/ /\n/g' | egrep -v '(au|ca|us)' | xargs rm -rf
#	rm -rf summary

# Download the WHOSONFIRST_HOME data from repositories such as
#	https://github.com/whosonfirst-data/whosonfirst-data-venue-us.git
#	I use whosonfirst-data-[venue|admin]-[au|gb|us|ca].git
#	I use whosonfirst-data-[venue]-us-??.git

# Download the DR5HN database:
#	cd $DR5HN_HOME && git clone https://github.com/dr5hn/countries-states-cities-database.git

# Openstreetmap.org data
#	cd $OSM_HOME && /usr/bin/wget https://download.geofabrik.de/europe-latest.osm.bz2
# Note that this can take a day to load in just AU, CA, GB, US

# You can look up a WOF place by https://spelunker.whosonfirst.org/id/$locality_id
#
# Try:
#	sqlite3 whosonfirst-data-latest.db
#	select body from geojson where body like '%north shields%';
# It'll be slow, but it shows the sort of thing this is about,
#	You'll see an id: field (e.g. 1108937103), then look at
#	https://spelunker.whosonfirst.org/id/1108937103

# TODO: perhaps use a layered approach to the database schema
# TODO: import https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/master/countries%2Bstates%2Bcities.json
# TODO: as the database is a simple key/value pair, perhaps CDB will be better

use 5.010;
use strict;
use warnings;
# use autodie qw(:all);
use autodie;	# Don't want system() to die, we catch failures
use Cwd;
use DBI;
use Data::Dumper;
use File::Copy;
use File::Open::NoCache::ReadOnly 0.02;
use File::Slurp;
use Geo::Coder::Abbreviations;
use JSON::MaybeXS;
use LWP::UserAgent::Throttled;
use Locale::AU;	# TODO: use Locale::Geocode
use Locale::CA;
use Locale::Country;
use Locale::US;
use Digest::MD5;
use Encode;
use Geo::StreetAddress::US;
use CHI;
use CHI::Driver::RawMemory;
use File::Basename;
use File::Spec;
use DBD::SQLite::Constants qw/:file_open/;	# For SQLITE_OPEN_READONLY
use IO::AIO;
use Lingua::EN::AddressParse;
use Locale::SubCountry;
use Text::CSV;
use Try::Tiny;
use XML::LibXML::Reader;

use	constant	AIO_READAHEAD_SIZE => 1048576;	# 1MB

use	constant	DEBUG_OFF => 0;
use	constant	DEBUG_INVALID_LENGTH => 1;
use	constant	DEBUG_L_EN_A => 2;
use	constant	DEBUG_NEW_LOCATION => 4;
use	constant	DEBUG_DETERMINE_LOCATION => 8;
use	constant	DEBUG_GET_WOF => 0x10;
use	constant	DEBUG_BREAKUP => 0x20;
use	constant	DEBUG_DATA_VALIDATE => 0x40;
use	constant	DEBUG_FLUSH => 0x80;
use	constant	DEBUG_ALL => 0xFF;
use	constant	DEBUG => DEBUG_OFF;

use	constant	MAX_INSERT_COUNT => 250;	# Maximum number of CSV rows to insert in a single statement
# use	constant	MAX_INSERT_COUNT => 1;	# Maximum number of CSV rows to insert in a single statement
use	constant	SQLITE_CHUNK_SIZE => 1_000;	# Number of rows to read at a time

binmode(STDOUT, "encoding(UTF-8)");

my %zipcodes = (
	'04350' => { city => 'Litchfield', county => 'Kennebec' },
	'04410' => { city => 'Bradford', county => 'Penobscot' },
	'04490' => { city => 'Topsfield', county => 'Washington' },
	'04653' => { city => 'Bass Harbor', county => 'Hancock' },
	'04654' => { city => 'Machias', county => 'Washington' },
	'04664' => { city => 'Sullivan', county => 'Hancock' },
	'04674' => { city => 'Seal Cove', county => 'Hancock' },
	'04677' => { city => 'Sorrento', county => 'Hancock' },
	'04679' => { city => 'Southwest Harbor', county => 'Hancock' },
	'04681' => { city => 'Stonington', county => 'Hancock' },
	'04685' => { city => 'Swans Island', county => 'Hancock' },
	'04787' => { city => 'Westfield', county => 'Aroostook' },
	'04984' => { city => 'Temple', county => 'Franklin' },
	'32346' => { city => 'Panacea', county => 'Wakulla' },
	'46204' => { city => 'Indianapolis', county => 'Marion' },
	'46206' => { city => 'Indianapolis', county => 'Marion' },
	'46222' => { city => 'Indianapolis', county => 'Marion' },
	'46231' => { city => 'Indianapolis', county => 'Marion' },
	'46282' => { city => 'Indianapolis', county => 'Marion' },
	'46259' => { city => 'Indianapolis', county => 'Marion' },
	'47001' => { city => 'Aurora', county => 'Dearborn' },
	'47864' => { city => 'New Lebanon', county => 'Sullivan' },
	'59276' => { city => 'Whitetail', county => 'Daniels' },
	'59645' => { city => 'White Sulphur Springs', county => 'Meagher' },
	'80011' => { city => 'Aurora', county => 'Arapahoe' },
	'80015' => { city => 'Aurora', county => 'Arapahoe' },
	'80016' => { city => 'Aurora', county => 'Arapahoe' },
	'80018' => { city => 'Aurora', county => 'Arapahoe' },
	'80131' => { city => 'Louviers', county => 'Douglas' },
	'80118' => { city => 'Larkspur', county => 'Douglas' },
	'80202' => { city => 'Denver', county => 'Adams' },
	'80218' => { city => 'Denver', county => 'Adams' },
	'80221' => { city => 'Denver', county => 'Adams' },
	'80222' => { city => 'Denver', county => 'Adams' },
	'80230' => { city => 'Denver', county => 'Adams' },
	'80233' => { city => 'Denver', county => 'Adams' },
	'80234' => { city => 'Denver', county => 'Adams' },
	'80236' => { city => 'Denver', county => 'Adams' },
	'80241' => { city => 'Denver', county => 'Adams' },
	'80293' => { city => 'Denver', county => 'Adams' },
	'80294' => { city => 'Denver', county => 'Adams' },
	'81501' => { city => 'Grand Junction', county => 'Mesa' },
	'81507' => { city => 'Grand Junction', county => 'Mesa' },
	'81432' => { city => 'Ridgway', county => 'Ouray' },
	'80513' => { city => 'Berthoud', county => 'Larimer' },
	'80516' => { city => 'Erie', county => 'Weld' },
	'80550' => { city => 'Windsor', county => 'Weld' },
	'80610' => { city => 'Auld', county => 'Weld' },
	'80615' => { city => 'Eaton', county => 'Weld' },
	'80631' => { city => 'Greeley', county => 'Weld' },
	'80634' => { city => 'Greeley', county => 'Weld' },
	'80642' => { city => 'Hudson', county => 'Weld' },
	'80645' => { city => 'La Salle', county => 'Weld' },
	'80650' => { city => 'Pierce', county => 'Weld' },
);

my %openaddresses_supported_countries = (
	'au' => 1,
	'ca' => 1,
	'us' => 1,
);
my @whosonfirst_only_countries = (
	'gb'
);

my %max_state_lengths = (
	'AU' => 3,
	'CA' => 2,
	'US' => 2
);

# TODO: Use Geo::Coder::Free::Local, until then keep the data in sync
# Ensure you use abbreviations, e.e. RD not ROAD
my %known_places = (	# Places I've checked with my GPS
	'us/ks/statewide.csv' => [
		{
			'LAT' => 39.005175,
			'LON' => -95.706681,
			'NUMBER' => 3516,
			'STREET' => 'SW MACVICAR AVE',
			'CITY' => 'TOPEKA',
			'COUNTY' => 'SHAWNEE',
			'STATE' => 'KS',
			'COUNTRY' => 'US',
			'POSTCODE' => 66611,
		},
	], 'us/md/statewide.csv' => [
		{
			'LAT' => 39.467270,
			'LON' => -76.823947,
			'NAME' => 'ALL SAINTS EPISCOPAL CHURCH',
			'NUMBER' => 203,
			'STREET' => 'E CHATSWORTH RD',
			'CITY' => 'REISTERSTOWN',
			'COUNTY' => 'BALTIMORE',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21136
		}, {
			'LAT' => 39.6852333333333,
			'LON' => -76.6071166666667,
			'NUMBER' => 7,
			'STREET' => 'JORDAN MILL COURT',
			'CITY' => 'WHITE HALL',
			'COUNTY' => 'BALTIMORE',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21161
		}, {
			'LAT' => 39.633018,
			'LON' => -76.272558,
			'NAME' => 'BALLPARK RESTAURANT',
			'NUMBER' => 3418,
			'STREET' => 'CONOWINGO RD',
			'CITY' => 'DUBLIN',
			'COUNTY' => 'HARFORD',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21154
		}, {
			'NAME' => 'NCBI',
			'LAT' => 38.99516556,
			'LON' => -77.09943963,
			'STREET' => 'MEDLARS DR',
			'CITY' => 'BETHESDA',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20894,
		}, {
			'LAT' => 38.99698114,
			'LON' => -77.10031119,
			'STREET' => 'CENTER DR',
			'CITY' => 'BETHESDA',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
		}, {
			# 'NAME' => 'BOLD BITE',
			'LAT' => 38.98939358,
			'LON' => -77.09819543,
			'STREET' => 'NORFOLK AVE',
			'CITY' => 'BETHESDA',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
		}, {
			'LAT' => 39.028326,
			'LON' => -77.136774,
			'NAME' => 'THE ATRIUM AT ROCK SPRING PARK',
			'NUMBER' => 6555,
			'STREET' => 'ROCKLEDGE DR',
			'CITY' => 'BETHESDA',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20852,
		}, {
			'LAT' => 39.2244603797302,
			'LON' => -77.449615439877,
			'STREET' => 'MOUTH OF MONOCACY RD',
			'CITY' => 'DICKERSON',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20842,
		}, {
			'NAME' => 'PATAPSCO VALLEY STATE PARK',
			'LAT' => 39.29491,
			'LON' => -76.78051,
			'NUMBER' => 8020,
			'STREET' => 'BALTIMORE NATIONAL PK',
			'CITY' => 'ELLICOTT CITY',
			'COUNTY' => 'HOWARD',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21043,
		}, {
			'LAT' => 39.683529,
			'LON' => -77.349405,
			'STREET' => 'ANNANDALE RD',
			'CITY' => 'EMMITSBURG',
			'COUNTY' => 'FREDERICK',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21727
		}, {
			'NAME' => 'UTICA DISTRICT PARK',
			'LAT' => 39.5167883333333,
			'LON' => -77.4015166666667,
			'CITY' => 'FREDERICK',
			'COUNTY' => 'FREDERICK',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21701,
		}, {
			'LAT' => 39.342986,
			'LON' => -77.239770,
			'NUMBER' => 3923,
			'STREET' => 'SUGARLOAF CT',
			'CITY' => 'MONROVIA',
			'COUNTY' => 'FREDERICK',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21770
		}, {
			'NAME' => 'ALBERT EINSTEIN HIGH SCHOOL',
			'LAT' => 39.03869019,
			'LON' => -77.0682871,
			'NUMBER' => 11135,
			'STREET' => 'NEWPORT MILL RD',
			'CITY' => 'KENSINGTON',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20895
		}, {
			'NAME' => 'POST OFFICE',
			'LAT' => 39.02554455,
			'LON' => -77.07178215,
			'NUMBER' => 10325,
			'STREET' => 'KENSINGTON PKWY',
			'CITY' => 'KENSINGTON',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20895
		}, {
			'NAME' => 'NEWPORT MILL MIDDLE SCHOOL',
			'LAT' => 39.0416107,
			'LON' => -77.06884708,
			'NUMBER' => 11311,
			'STREET' => 'NEWPORT MILL RD',
			'CITY' => 'KENSINGTON',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20895
		}, {
			'NAME' => 'SAFEWAY',
			'LAT' => 39.02822438,
			'LON' => -77.0755196,
			'NUMBER' => 10541,
			'STREET' => 'HOWARD AVE',
			'CITY' => 'KENSINGTON',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20895
		}, {
			'NAME' => 'HAIR CUTTERY',
			'LAT' => 39.03323865,
			'LON' => -77.07368044,
			'NUMBER' => 3731,
			'STREET' => 'CONNECTICUT AVE',
			'CITY' => 'KENSINGTON',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
		}, {
			'NAME' => 'STROSNIDERS',
			'LAT' => 39.02781493,
			'LON' => -77.07740792,
			'NUMBER' => 10504,
			'STREET' => 'CONNECTICUT AVE',
			'CITY' => 'KENSINGTON',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
		}, {
			'LAT' => 39.110711,
			'LON' => -76.434062,
			'NAME' => 'DOWNS PARK',
			'STREET' => 'CHESAPEAKE BAY DRIVE',
			'CITY' => 'PASADENA',
			'COUNTY' => 'ANNE ARUNDEL',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21122
		}, {
			'LAT' => 39.102637,
			'LON' => -76.456384,
			'NUMBER' => 1559,
			'STREET' => 'GUERDON CT',
			'CITY' => 'PASADENA',
			'COUNTY' => 'ANNE ARUNDEL',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21122
		}, {
			'NAME' => 'ADVENTIST HOSPITAL',
			'LAT' => 39.049570,
			'LON' => -76.956882,
			'NUMBER' => 11886,
			'STREET' => 'HEALING WAY',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20904,
		}, {
			'LAT' => 39.017633,
			'LON' => -77.049551,
			'NUMBER' => 9904,
			'STREET' => 'GARDINER AVE',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20902,
		}, {
			'NAME' => 'FOREST GLEN MEDICAL CENTER',
			'LAT' => 39.016042,
			'LON' => -77.042148,
			'NUMBER' => 9801,
			'STREET' => 'GEORGIA AVE',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20902,
		}, {
			'LAT' => 39.019385,
			'LON' => -77.049779,
			'NUMBER' => 2322,
			'STREET' => 'HILDAROSE DR',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20902,
		}, {
			'NAME' => 'LA CASITA PUPESERIA AND MARKET',
			'LAT' => 38.993369,
			'LON' => -77.009501,
			'NUMBER' => 8214,
			'STREET' => 'PINEY BRANCH RD',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910,
		}, {
			'LAT' => 38.991667,
			'LON' => -77.030473,
			'NAME' => 'NOAA LIBRARY',
			'NUMBER' => 1315,
			'STREET' => 'EAST-WEST HIGHWAY',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910
		}, {
			'NAME' => 'SNIDERS',
			'LAT' => 39.0088797,
			'LON' => -77.04162824,
			'NUMBER' => 1936,
			'STREET' => 'SEMINARY RD',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910,
		}, {
			'LAT' => 39.008961,
			'LON' => -77.043030,
			'NUMBER' => 1954,
			'STREET' => 'SEMINARY RD',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910,
		}, {
			'LAT' => 39.008845,
			'LON' => -77.043317,
			'NUMBER' => 1956,
			'STREET' => 'SEMINARY RD',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910,
		}, {
			'LAT' => 39.008810,
			'LON' => -77.048953,
			'NUMBER' => 9315,
			'STREET' => 'WARREN ST',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910,
		}, {
			'LAT' => 39.036439,
			'LON' => -77.025502,
			'NAME' => 'ARCOLA HEALTH AND REHABILITATION CENTER',
			'NUMBER' => 901,
			'STREET' => 'ARCOLA AVE',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910,
		}, {
			'LAT' => 39.010436,
			'LON' => -77.048550,
			'NUMBER' => 9411,
			'STREET' => 'WARREN ST',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910,
		}, {
			'NAME' => 'SILVER DINER',
			'LAT' => 39.05798753,
			'LON' => -77.12165374,
			'NUMBER' => 12276,
			'STREET' => 'ROCKVILLE PK',
			'CITY' => 'ROCKVILLE',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20852
		}, {
			# LF
			'LAT' => 39.07669788,
			'LON' => -77.12306436,
			'NUMBER' => 1605,
			'STREET' => 'VIERS MILL RD',
			'CITY' => 'ROCKVILLE',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20851
		}, {
			'LAT' => 39.075583,
			'LON' => -77.123833,
			'NUMBER' => 1406,
			'STREET' => 'LANGBROOK PLACE',
			'CITY' => 'ROCKVILLE',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20851
		}, {
			'LAT' => 39.0147541,
			'LON' => -77.05466857,
			'NAME' => 'BP',
			'NUMBER' => 2601,
			'STREET' => 'FOREST GLEN RD',
			'CITY' => 'SILVER SPRING',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20910
		}, {
			'LAT' => 39.06412645,
			'LON' => -77.11252263,
			'NAME' => 'OMEGA STUDIOS',
			'NUMBER' => 12412,	# Suite 14A
			'CITY' => 'ROCKVILLE',
			'COUNTY' => 'MONTGOMERY',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20852
		}, {
			'LAT' => 38.996764,
			'LON' => -76.849323,
			'NAME' => 'NASA',
			'STREET' => '',
			'CITY' => 'GREENBELT',
			'COUNTY' => 'PRINCE GEORGES',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 20771
		}, {
			'LAT' => 39.190009,
			'LON' => -76.841152,
			'NUMBER' => 7001,
			'ROAD' => 'CRADLEROCK FARM COURT',
			'CITY' => 'COLUMBIA',
			'COUNTY' => 'HOWARD',
			'STATE' => 'MD',
			'COUNTRY' => 'US',
			'POSTCODE' => 21045,
		}
	], 'us/me/statewide.csv' => [
		{
			'LAT' => 44.35378018,
			'LON' => -68.57383976,
			'NUMBER' => 86,
			'STREET' => 'ALLEN POINT LANE',
			'CITY' => 'BLUE HILLS',
			'COUNTY' => 'HANCOCK',
			'STATE' => 'ME',
			'COUNTRY' => 'US',
			'POSTCODE' => 04614
		}, {
			'LAT' => 44.406700,
			'LON' => -68.597114,
			'NAME' => 'BANGOR AIRPORT',
			'STREET' => 'GODFREY BOULEVARD',
			'CITY' => 'BANGOR',
			'COUNTY' => 'PENOBSCOT',
			'STATE' => 'ME',
			'COUNTRY' => 'US',
			'POSTCODE' => 04401
		}, {
			'LAT' => 44.40670019,
			'LON' => -68.59711438,
			'NAME' => 'TRADEWINDS',
			'NUMBER' => 15,
			'STREET' => 'SOUTH STREET',
			'CITY' => 'BLUE HILLS',
			'COUNTY' => 'HANCOCK',
			'STATE' => 'ME',
			'COUNTRY' => 'US',
			'POSTCODE' => 04614
		}, {
			'LAT' => 44.40662476,
			'LON' => -68.59610059,
			'NAME' => 'RITE AID',
			'NUMBER' => 17,
			'STREET' => 'SOUTH STREET',
			'CITY' => 'BLUE HILLS',
			'COUNTY' => 'HANCOCK',
			'STATE' => 'ME',
			'COUNTRY' => 'US',
			'POSTCODE' => 04614
		}
	], 'us/dc/statewide.csv' => [
		{
			'LAT' => 38.955403,
			'LON' => -76.996241,
			'NUMBER' => 5350,
			'STREET' => 'CHILLUM PLACE NE',
			'CITY' => 'WASHINGTON',
			'STATE' => 'DC',
			'COUNTRY' => 'US',
			'POSTCODE' => 20011
		}, {
			'LAT' => 38.904022,
			'LON' => -77.023113,
			'NAME' => 'WALTER E. WASHINGTON CONVENTION CENTER',
			'NUMBER' => 801,
			'STREET' => 'MT VERNON PL NW',
			'CITY' => 'WASHINGTON',
			'STATE' => 'DC',
			'COUNTRY' => 'US',
			'POSTCODE' => 20001
		}
	], 'us/id/statewide.csv' => [
		{
			'LAT' => 47.693615,
			'LON' => -116.915357,
			'NUMBER' => 880,
			'STREET' => 'SOUTH GREENSFERRY RD',
			'CITY' => "COUER D'ALENE",
			'COUNTY' => 'KOOTENAI',
			'STATE' => 'ID',
			'POSTCODE' => 83814
		}, {
			'LAT' => 47.69556,
			'LON' => -116.91564,
			'NUMBER' => 898,
			'STREET' => 'SOUTH GREENSFERRY RD',
			'CITY' => "COUER D'ALENE",
			'COUNTY' => 'KOOTENAI',
			'STATE' => 'ID',
			'POSTCODE' => 83814
		}
	], 'us/in/statewide.csv' => [
		{
			'LAT' => 41.074247,
			'LON' => -85.138531,
			'ROAD' => 'DOUGLAS AVE',
			'CITY' => 'FORT WAYNE',
			'COUNTY' => 'ALLEN',
			'STATE' => 'IN',
			'COUNTRY' => 'US',
			'POSTCODE' => 46802,
		}
	], 'us/oh/statewide.csv' => [
		{
			'LAT' => 39.997959,
			'LON' => -82.881320,
			'NAME' => 'JOHN GLENN AIRPORT',
			'NUMBER' => 4600,
			'ROAD' => 'INTERNATIONAL GATEWAY',
			'CITY' => 'COLUMBUS',
			'COUNTY' => 'FRANKLIN',
			'STATE' => 'OH',
			'COUNTRY' => 'US',
			'POSTCODE' => 43219,
		}, {
			'LAT' => 41.379695,
			'LON' => -82.222877,
			'NAME' => 'MIDDLE RIDGE PLAZA',
			'CITY' => 'AMHERST',
			'COUNTY' => 'LOHRAIN',
			'STATE' => 'OH',
			'COUNTRY' => 'US',
			'POSTCODE' => 44001
		}, {
			'LAT' => 40.097097,
			'LON' => -83.123745,
			'NAME' => 'RESIDENCE INN BY MARRIOTT',
			'NUMBER' => '6364',
			'STREET' => 'FRANTZ RD',
			'CITY' => 'DUBLIN',
			'STATE' => 'OH',
			'COUNTRY' => 'US',
			'POSTCODE' => 43017
		}, {
			'LAT' => 41.291654,
			'LON' => -81.675815,
			'NAME' => 'TOWPATH TRAVEL PLAZA',
			'CITY' => 'BROADVIEW HEIGHTS',
			'COUNTY' => 'CUYAHOGA',
			'STATE' => 'OH',
			'COUNTRY' => 'US',
			'POSTCODE' => 44147
		}
	], 'us/pa/statewide.csv' => [
		{
			'LAT' => 40.206267,
			'LON' => -79.565682,
			'NAME' => 'NEW STANTON SERVICE PLAZA',
			'CITY' => 'HEMPFIELD',
			'STATE' => 'PA',
			'COUNTRY' => 'US',
			'POSTCODE' => 15639
		}, {
			'LAT' => 39.999154,
			'LON' => -79.046526,
			'NAME' => 'SOUTH SOMERSET SERVICE PLAZA',
			'CITY' => 'SOMERSET',
			'COUNTY' => 'SOMERSET',
			'STATE' => 'PA',
			'COUNTRY' => 'US',
			'POSTCODE' => 15501
		}
	], 'us/va/statewide.csv' => [
		{
			'LAT' => 38.75422,
			'LON' =>  -77.1058666666667,
			'NAME' => 'HUNTLEY MEADOWS PARK',
			'NUMBER' => 3701,
			'STREET' => 'LOCKHEED BLVD',
			'CITY' => 'ALEXANDRIA',
			'STATE' => 'VA',
			'COUNTRY' => 'US',
			'POSTCODE' => 22306
		}, {
			'LAT' => 38.873934,
			'LON' => -77.461939,
			'NUMBER' => 14900,
			'STEET' => 'CONFERENCE CENTER DR',
			'CITY' => 'CHANTILLY',
			'COUNTY' => 'FAIRFAX',
			'STATE' => 'VA',
			'COUNTRY' => 'US',
			'POSTCODE' => 20151
		}, {
			'LAT' => 38.915635,
			'LON' => -77.225730,
			'NAME' => 'THE CAPITAL GRILLE RESTAURANT',
			'NUMBER' => 1861,
			'STEET' => 'INTERNATIONAL DR',
			'CITY' => 'MCLEAN',
			'COUNTY' => 'FAIRFAX',
			'STATE' => 'VA',
			'COUNTRY' => 'US',
			'POSTCODE' => 22102
		}, {
			'LAT' => 44.40662476,
			'LON' => -68.59610059,
			'NAME' => 'THE PURE PASTY COMPANY',
			'NUMBER' => '128C',
			'STREET' => 'MAPLE AVE W',
			'CITY' => 'VIENNA',
			'COUNTY' => 'FAIRFAX',
			'STATE' => 'VA',
			'COUNTRY' => 'US',
			'POSTCODE' => 22180
		}, {
			'LAT' => 39.124843,
			'LON' => -77.535445,
			'NUMBER' => 818,
			'STREET' => 'FERNDALE TERRACE NE',
			'CITY' => 'LEESBURG',
			'COUNTY' => 'LOUDOUN',
			'STATE' => 'VA',
			'COUNTRY' => 'US',
			'POSTCODE' => 20176
		}, {
			'LAT' => 39.136193,
			'LON' => -77.693198,
			'STREET' => 'PURCELLVILLE GATEWAY DR',
			'CITY' => 'PURCELLVILLE',
			'COUNTY' => 'LOUDOUN',
			'STATE' => 'VA',
			'COUNTRY' => 'US',
			'POSTCODE' => 20132
		}, {
			'LAT' => 38.25075,
			'LON' => -76.9602533333333,
			'STREET' => '',
			'CITY' => 'COLONIAL BEACH',
			'COUNTY' => 'WESTMORELAND',
			'STATE' => 'VA',
			'COUNTRY' => 'US',
			'POSTCODE' => 22443
		}
	], 'other' => [
		{
			'LAT' => 51.926793,
			'LON' => 0.70408,
			'NAME' => 'ST ANDREWS CHURCH',
			'STREET' => 'CHURCH HILL',
			'CITY' => 'EARLS COLNE',
			'STATE' => 'ESSEX',
			'COUNTRY' => 'GB'
		}, {
			'LAT' => 51.358967,
			'LON' => 1.391367,
			'NAME' => 'WESTWOOD CROSS',
			'NUMBER' => 23,
			'STREET' => 'MARGATE RD',
			'CITY' => 'BROADSTAIRS',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.37875,
			'LON' => 1.1955,
			'NAME' => 'RECULVER ABBEY',
			'STREET' => 'RECULVER',
			'CITY' => 'HERNE BAY',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.357510,
			'LON' => 1.388894,
			'NAME' => 'TOBY CARVERY',
			'STREET' => 'NEW HAINE RD',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.334522,
			'LON' => 1.314417,
			'NAME' => 'NEW INN',
			'NUMBER' => 2,
			'STREET' => 'TOTHILL ST',
			# 'CITY' => 'MINSTER, THANET',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.34320725,
			'LON' => 1.31680853,
			'NAME' => 'HOLIDAY INN EXPRESS',
			'STREET' => 'TOTHILL ST',
			# 'CITY' => 'MINSTER, THANET',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.334522,
			'LON' => 1.314417,
			'NAME' => 'NEW INN',
			'NUMBER' => 2,
			'STREET' => 'TOTHILL ST',
			# 'CITY' => 'MINSTER, THANET',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.33995174,
			'LON' => 1.31570211,
			'NUMBER' => 106,
			'STREET' => 'TOTHILL ST',
			# 'CITY' => 'MINSTER, THANET',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.34015944,
			'LON' => 1.31580976,
			'NUMBER' => 114,
			'STREET' => 'TOTHILL ST',
			# 'CITY' => 'MINSTER, THANET',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.34203083,
			'LON' => 1.31609075,
			'NAME' => 'MINSTER CEMETERY',
			'NUMBER' => 116,
			'STREET' => 'TOTHILL ST',
			# 'CITY' => 'MINSTER, THANET',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.33090893,
			'LON' => 1.31559716,
			'NAME' => 'ST MARY THE VIRGIN CHURCH',
			'STREET' => 'CHURCH ST',
			# 'CITY' => 'MINSTER, THANET',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.34772374,
			'LON' => 1.39532565,
			'NUMBER' => 20,
			'STREET' => 'MELBOURNE AVE',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.32711,
			'LON' => 1.406806,
			'STREET' => 'WESTCLIFF PROMENADE',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB',
		}, {
			'LAT' => 51.340826,
			'LON' => 1.406519,
			'NAME' => 'RAMSGATE STATION',
			'STREET' => 'STATION APPROACH RD',
			'CITY' => 'RAMSGATE',
			'STATE' => 'KENT',
			'COUNTRY' => 'GB'
		}, {
			'LAT' => 51.5082675,
			'LON' => -0.0754225,
			'NAME' => 'TOWER OF LONDON',
			'NUMBER' => 35,
			'STREET' => 'TOWER HILL',
			'CITY' => 'LONDON',
			'STATE' => 'LONDON',
			'COUNTRY' => 'GB',
			# 'POSTCODE' => 20894,
		}
	]
);

BEGIN {
	$SIG{__WARN__} = sub {
		my $warning = shift;
		if($warning =~ /^Use of uninitialized value/) {
			die $warning;
		}
	};
}

my %postcodes = ();
my $current_state;
my %global_md5s;
my %state_md5s;
my %queued_commits;
my $city_sequence;
my %cities;
my %unknown_zips;
my %state_parent_md5s;

my $cities_sql = 'lib/Geo/Coder/Free/MaxMind/databases/cities.sql';

if(!-r $cities_sql) {
	(system('bash bin/create_sqlite') == 0)
		or warn "Can't create the SQLite database (perhaps you don't have sqlite3 installed) - expect poor performance";

	if(-r $cities_sql) {
		if(-r "blib/$cities_sql") {
			unlink("blib/$cities_sql");
		}
		copy($cities_sql, "blib/$cities_sql")
			or die "Can't copy SQLite file to blib: $!";
	}
}

exit(0) if($ENV{'AUTOMATED_TESTING'});

if(!(-r 'downloads/allCountries.txt')) {
	print "Consider downloading http://download.geonames.org/export/dump/allCountries.zip to downloads/allCountries.txt\n";
}

my $oa = $ENV{'OPENADDR_HOME'};

exit(0) if((!defined($oa)) || (length($oa) == 0));

$Data::Dumper::Sortkeys = 1;

# Import openaddresses.io data into an SQLite database
# TODO: download and unzip the files from results.openaddresses.io
# TODO: only Australian, US and Canadian data is supported at the moment

my $sqlite_file = "$oa/openaddresses.sql";
exit(0) if(-r $sqlite_file);

my $abbr = Geo::Coder::Abbreviations->new();

# TODO Make this configurable
my $dbh = DBI->connect("dbi:SQLite:dbname=$sqlite_file", undef, undef, { RaiseError => 1, AutoCommit => 0, synchronous => 0, locking_mode => 'EXCLUSIVE' });
if($dbh) {
	$dbh->do('PRAGMA cache_size = -65536');	# 64MB
	$dbh->do('PRAGMA journal_mode = OFF');
}
if(!-d $oa) {
	mkdir $oa;
}
# $dbh->do('CREATE TABLE cities(sequence INTEGER, city VARCHAR, county VARCHAR, state VARCHAR NOT NULL, country CHAR(2) NOT NULL)');
# $dbh->do('CREATE TABLE openaddresses(md5 CHAR(16), lat DECIMAL, lon DECIMAL, name VARCHAR, number VARCHAR, street VARCHAR, city INTEGER, FOREIGN KEY (city) REFERENCES cities(sequence))');
if((DEBUG&DEBUG_ALL) && (MAX_INSERT_COUNT != 1)) {
	warn "MAX_INSERT_COUNT not set to 1 in DEBUG mode";
}

if($dbh) {
	if(MAX_INSERT_COUNT == 1) {
		$dbh->do('CREATE TABLE openaddresses(md5 CHAR(16) PRIMARY KEY, lat DECIMAL, lon DECIMAL)');
	} else {
		$dbh->do('CREATE TABLE openaddresses(md5 CHAR(16), lat DECIMAL, lon DECIMAL)');
	}
}
# $dbh->do('CREATE TABLE openaddresses(md5 CHAR(16) PRIMARY KEY, lat DECIMAL, lon DECIMAL, name VARCHAR, number VARCHAR, street VARCHAR, city INTEGER, FOREIGN KEY (city) REFERENCES cities(sequence))');
# $dbh->prepare('CREATE TABLE tree(lat DECIMAL, lon DECIMAL, md5 CHAR(16) NOT NULL, parent CHAR(16))')->execute();

print "This will take some time.\nBest to do it last thing at night and go to sleep, it should be ready in the morning.\n";

my $redis;

if($ENV{'REDIS_SERVER'}) {
	# Warning:  Redis stores the database in RAM which makes it
	#	slow and unrealistic for most scenarios as it will
	#	use all the memory on your machine
	require Redis;
	Redis->import();

	$redis = Redis->new(reconnect => 1200, every => 5_000_000);
	$redis->select(1);
	$redis->flushdb();
}

my $au = Locale::AU->new();
my $ca = Locale::CA->new();
my $us = Locale::US->new();

my $filename = 'lib/Geo/Coder/Free/OpenAddresses/databases/states.txt';
if(-r $filename) {
	# Import US states and counties from https://github.com/openaddresses/openaddresses/tree/master/us-data
	$| = 1;
	printf "%-70s\r", $filename;
	print "\n" if(DEBUG);
	$| = 0;
	my %state_fips;

	my $csv = Text::CSV->new({
		sep_char => "\t",
		allow_loose_quotes => 1,
		blank_is_undef => 1,
		empty_is_undef => 1,
		binary => 1,
		# Workaround for https://github.com/openaddresses/openaddresses/issues/3905
		# escape_char => '\\',	# Put back once issue 3905 has been fixed
	});

	my $inserts = 0;

	my $fh = File::Open::NoCache::ReadOnly->new($filename);

	my $fin = $fh->fd();

	# open(my $fin, '<', $filename);

	$csv->column_names($csv->getline($fin));

	while(my $row = $csv->getline_hr($fin)) {
		# print Data::Dumper->new([\$row])->Dump();
		my $state;
		unless($state = $us->{state2code}{uc($row->{'Name'})}) {
			die $row->{'Name'};
		}
		$state_fips{$row->{'State FIPS'}} = $state;
		my %columns = (
			'COUNTRY' => 'US',
			'STATE' => $state,
			'LAT' => $row->{'Latitude'},
			'LON' => $row->{'Longitude'},
		);
		# print "$zip => $query\n";
		$inserts += insert($dbh, $redis, 1, %columns);
		if($inserts >= MAX_INSERT_COUNT) {
			flush_queue($dbh, $redis);
			$inserts = 0;
		}
	}

	$fh->close();

	# Import the counties
	$filename = 'lib/Geo/Coder/Free/OpenAddresses/databases/counties.txt';
	$| = 1;
	printf "%-70s\r", $filename;
	print "\n" if(DEBUG);
	$| = 0;

	$csv = Text::CSV->new({
		sep_char => "\t",
		allow_loose_quotes => 1,
		blank_is_undef => 1,
		empty_is_undef => 1,
		binary => 1,
		# Workaround for https://github.com/openaddresses/openaddresses/issues/3905
		# escape_char => '\\',	# Put back once issue 3905 has been fixed
	});

	$fh = File::Open::NoCache::ReadOnly->new($filename);

	$fin = $fh->fd();

	# open($fin, '<', $filename);

	$csv->column_names($csv->getline($fin));

	while(my $row = $csv->getline_hr($fin)) {
		# print __LINE__, ': ', Data::Dumper->new([\$row])->Dump();
		my $state = $state_fips{$row->{'State FIPS'}};
		die $row->{'Name'} unless(defined($state));
		my $county = uc($row->{'Name'});
		$county =~ s/\s+COUNTY$//;
		$county =~ s/'/''/g;	# O'Brien County, IA
		my %columns = (
			'COUNTRY' => 'US',
			'STATE' => $state,
			'COUNTY' => $county,
			'LAT' => $row->{'Latitude'},
			'LON' => $row->{'Longitude'},
		);
		# print "$zip => $query\n";
		$inserts += insert($dbh, $redis, 1, %columns);
		if($inserts >= MAX_INSERT_COUNT) {
			flush_queue($dbh, $redis);
			$inserts = 0;
		}
	}
	$fh->close();
}

my $current_state;
my $current_country;
my %whosonfirst;
my $ua = LWP::UserAgent::Throttled->new(keep_alive => 1);
$ua->throttle({ 'api.zippopotam.us' => 1 });
$ua->env_proxy(1);

# Import the Maxmind databases
if((-r 'downloads/allCountries.txt') &&
   # open(my $fin, '<', 'lib/Geo/Coder/Free/GeoNames/databases/allCountries.txt') &&
   # open(my $fin1, '<', 'lib/Geo/Coder/Free/MaxMind/databases/admin1.db') &&
   (my $all_countries = File::Open::NoCache::ReadOnly->new('downloads/allCountries.txt')) &&
   (my $admin1 = File::Open::NoCache::ReadOnly->new('lib/Geo/Coder/Free/MaxMind/databases/admin1.db')) &&
   (my $admin2 = File::Open::NoCache::ReadOnly->new('lib/Geo/Coder/Free/MaxMind/databases/admin2.db'))) {
   # open(my $fin2, '<', 'lib/Geo/Coder/Free/MaxMind/databases/admin2.db')) {
	my $fin = $all_countries->fd();
	aio_readahead($fin, 0, AIO_READAHEAD_SIZE);
	my $fin1 = $admin1->fd();
	aio_readahead($fin1, 0, AIO_READAHEAD_SIZE);
	my $fin2 = $admin2->fd();
	aio_readahead($fin2, 0, AIO_READAHEAD_SIZE);

	$| = 1;
	printf "%-70s\r", 'downloads/allCountries.txt';
	$| = 0;
	print "\n" if(MAX_INSERT_COUNT == 1);

	# $csv->column_names(['key', 'name', 'asciiname', 'lat', 'long', 'skip1', 'skip2', 'country', 'state', 'county', 'skip3', 'skip4', 'skip5', 'skip6', 'skip7', 'skip8']);

	my $csv = Text::CSV->new({
		sep_char => "\t",
		allow_loose_quotes => 1,
		blank_is_undef => 1,
		empty_is_undef => 1,
		binary => 1,
		# Workaround for https://github.com/openaddresses/openaddresses/issues/3905
		# escape_char => '\\',	# Put back once issue 3905 has been fixed
	});

	$csv->column_names(['concatenated_codes', 'name', 'asciiname', 'geonameId']);

	my %admin1;

	while(my $row = $csv->getline_hr($fin1)) {
		$admin1{$row->{'concatenated_codes'}} = $row->{'asciiname'};
	}

	$admin1->close();

	my %admin2;

	while(my $row = $csv->getline_hr($fin2)) {
		$admin2{$row->{'concatenated_codes'}} = $row->{'asciiname'};
	}

	$admin2->close();

	my $inserts = 0;
	my $offset = AIO_READAHEAD_SIZE;

	while(my $line = <$fin>) {
		my @fields = split(/\t/, $line);

		my $country = lc($fields[8]);
		if(!$openaddresses_supported_countries{$country}) {
			my $found = 0;
			foreach my $c(@whosonfirst_only_countries) {
				if($country eq $c) {
					$found = 1;
					last;
				}
			}
			next if(!$found);
		}
		my $city;
		if($fields[7] ne 'ADM1') {
			# Not State/County/Province
			$city = uc($fields[2]);
		}

		my $latitude = $fields[4];
		my $longitude = $fields[5];
		my %columns;
		if($country eq 'gb') {
			my $state = $admin2{uc($country) . '.' . uc($fields[10]) . '.' . uc($fields[11])};
			next if(!defined($state));

			%columns = (
				'CITY' => $city,
				'STATE' => uc($state),
				'COUNTRY' => 'GB',
				'LAT' => $latitude,
				'LON' => $longitude,
			);
		} else {
			my $state = $admin1{uc($country) . '.' . uc($fields[10])};
			next if(!defined($state));
			$state = uc($state);
			if($state eq 'YUKON') {
				$state = 'YT';
			} elsif($state eq 'WASHINGTON, D.C.') {
				$state = 'DC';
			} elsif(my $sc = Locale::SubCountry->new($country)) {
				if(my $code = $sc->code($state)) {
					$code = uc($code);
					$state = $code if($code ne 'UNKNOWN');
					# die "$state:\n", Data::Dumper->new([\@fields])->Dump();
				} else {
					die $state;
				}
			} else {
				die "Locale::SubCountry failed on $country";
			}
			if(defined($city) && ($city =~ /(.+)\s+county$/i)) {
				%columns = (
					'COUNTY' => uc($1),
					'STATE' => uc($state),
					'COUNTRY' => uc($country),
					'LAT' => $latitude,
					'LON' => $longitude,
				);
			} else {
				%columns = (
					'CITY' => $city,
					'STATE' => uc($state),
					'COUNTRY' => uc($country),
					'LAT' => $latitude,
					'LON' => $longitude,
				);
			}
		}
		# print Data::Dumper->new([\%columns])->Dump() if(!defined($city));
		$inserts += import(row => \%columns, file => "$country/countrywide.csv", ua => $ua, dbh => $dbh, redis => $redis, global => 1);
		if($inserts >= MAX_INSERT_COUNT) {
			flush_queue($dbh, $redis);
			aio_readahead($fin, $offset, AIO_READAHEAD_SIZE);
			$offset += AIO_READAHEAD_SIZE;
			$inserts = 0;
		}
	}
	$all_countries->close();
}
flush_queue($dbh, $redis);

# print "Debug exit\n" if(DEBUG&DEBUG_ALL);
# $dbh->commit();
# $dbh->disconnect();
# $wof_global_dbh->disconnect();
# exit(0);

my $inserts = 0;

my %address_parsers;

# my $wof_global_dbh;
if(my $whosonfirst = $ENV{'WHOSONFIRST_HOME'}) {
	# Find all of the .geojson files in $WHOSONFIRST_HOME
	foreach my $geojson_file (create_tree_from_git($whosonfirst, 0)) {
		$| = 1;
		printf "%-70s\r", $geojson_file;
		print "\n" if(DEBUG);
		$| = 0;

		my $j = JSON::MaybeXS->new()->utf8();
		my $data = File::Slurp::read_file($geojson_file);
		$data = $j->decode($data);
		my $properties = $data->{'properties'};
		# print "processing ", $properties->{'wof:id'}, "\n";
		next if(exists($properties->{'wof:superseded_by'}) && scalar(@{$properties->{'wof:superseded_by'}}));
		next if(exists($properties->{'mz:is_current'}) && ($properties->{'mz:is_current'} == 0));
		next if(exists($properties->{'src:geom'}) && ($properties->{'src:geom'} eq 'missing'));	# https://github.com/whosonfirst-data/whosonfirst-data/issues/1346
		my $placetype = $properties->{'wof:placetype'};
		next if(!defined($placetype));
		next if($placetype eq 'country');
		next if($placetype eq 'marketarea');
		my $country = $properties->{'wof:country'};
		next if(!defined($country));
		if(!$openaddresses_supported_countries{lc($country)}) {
			my $found = 0;
			foreach my $c(@whosonfirst_only_countries) {
				if(lc($country) eq $c) {
					$found = 1;
					last;
				}
			}
			next if(!$found);
		}
		$country = uc($country);
		# print $placetype, "\n";
		my $state;
		if($placetype eq 'region') {
			# print "\t$country\n";
			if(($country eq 'US') || ($country eq 'CA') || ($country eq 'AU')) {
				$state = $properties->{'wof:abbreviation'} || $properties->{'wof:shortcode'} || $properties->{'wof:name'};
			} else {
				$state = $properties->{'wof:name'};
			}
			# print "\t$state\n";
		} else {
			$state = $properties->{'sg:province'};
			if((!defined($state)) && ($country eq 'US') && (my $rc = $properties->{'qs:gn_nameadm1'})) {
				if($rc =~ /^\[A-Z]{2}$/) {
					die $rc;
				}
			}
		}
		# if((!defined($state)) && ($placetype eq 'borough') && (my $region = $properties->{'wof:region_id'})) {
			# # FIXME:  This is probably a dup of the next if clause
			# $state = get_wof($wof_global_dbh, $region);
		# }
		if((!defined($state)) && (my $a1 = ($properties->{'as:a1'} || $properties->{'qs:a1'} || $properties->{'qs:name_adm1'} || $properties->{'qs_pg:name_adm1'} || $properties->{'woe:name_adm1'}))) {
			if(($a1 eq 'England') || ($a1 eq 'Scotland') || ($a1 eq 'Wales') || ($a1 eq 'Northern Ireland')) {
				$a1 = $properties->{'qs:name_adm2'};
			}
			if(($country eq 'US') || ($country eq 'CA') || ($country eq 'AU')) {
				if(my $sc = Locale::SubCountry->new($country)) {
					if($a1 =~ /^(.+)[\s_]COUNTY/i) {
						$a1 = $1;
					}
					$a1 =~ s/^\W+//;
					# die Data::Dumper->new([$properties])->Dump();
					if(my $code = $sc->code($a1)) {
						$code = uc($code);
						$state = $code if($code ne 'UNKNOWN');
					}
				} else {
					die "Locale::SubCountry failed on $country";
				}
			} else {
				$state = $a1;
			}
			if($state) {
				$state =~ s/^\W+//;
				if($state =~ /^CITY\[_\s]OF/i) {
					$state = undef;
				} else {
					die Data::Dumper->new([$properties])->Dump() if($state eq 'England');
					print "Gained $state from looking at the record\n" if(DEBUG&DEBUG_DETERMINE_LOCATION);
				}
			}
		}

		my $region_id;
		my $region_name;
		if(!defined($state)) {
			my @hierarchy = @{$properties->{'wof:hierarchy'}};
			if(scalar(@hierarchy) && (my $region = $hierarchy[0]->{'region_id'})) {
				if(defined($region_id) && ($region == $region_id)) {
					$state = $region_name;
					# print "Saved state = $state\n" if(DEBUG&DEBUG_ALL);
				} else {
					next if($region < 0);
					print "Getting state from hierarchy:\n\t", Data::Dumper->new([$properties])->Dump() if(DEBUG&DEBUG_DETERMINE_LOCATION);
					$state = get_wof($properties, $region);
					if($state) {
						print "\tGot $state\n" if(DEBUG&DEBUG_DETERMINE_LOCATION);
						# Remember the region's name, since consecutive entries in the file are often the same,
						# this is save a number of calls to get_wof()
						$region_id = $region;
						$region_name = $state;
					} else {
						print "\tCouldn't work out the state\n" if(DEBUG&DEBUG_DETERMINE_LOCATION);
					}
				}
			} else {
				# print "\tcan't determine the state\n",
					# Data::Dumper->new([$properties])->Dump();
				# print "\tcan't determine the state\n";
				next;
			}
			# FIXME: the information will be in there somewhere
			# if(!defined($state)) {
				# die Data::Dumper->new([$properties])->Dump();
			# }
			next unless($state);
		}
		if($max_state_lengths{$country} && (length($state) > $max_state_lengths{$country})) {
			if(my $sc = Locale::SubCountry->new($country)) {
				if(my $code = $sc->code($state)) {
					$code = uc($code);
					$state = $code if($code ne 'UNKNOWN');
					# die Data::Dumper->new([$properties])->Dump();
				}
			} else {
				die "Locale::SubCountry failed on $country";
			}
		}
		my $city;
		if(($placetype eq 'locality') || ($placetype eq 'neighbourhood') || ($placetype eq 'borough')) {
			$city = $properties->{'wof:name'};
			if(($placetype eq 'borough') && (my $parent = $properties->{'wof:parent_id'})) {
				if($parent = get_wof($properties, $parent)) {
					$city = "$city, $parent";
				# } else {
					# Most likely the parent is in a different database
					# die "Can't determine the parent for $city in ", $properties->{'wof:id'}, "->$parent";
				}
			}
			die "Can't determine the city" if(!defined($city));
		} else {
			$city = $properties->{'sg:city'};
			# Don't trust sg:city to be correct
			my @hierarchy = @{$properties->{'wof:hierarchy'}};
			if(scalar(@hierarchy) && (my $locality = $hierarchy[0]->{'locality_id'})) {
				if(my $w = get_wof($properties, $locality)) {
					$city = $w;
				}
			}
		}
		my $file = basename($geojson_file);

		if($placetype eq 'county') {
			# TODO
			# print "\tcounty: ", $properties->{'wof:name'}, ", $state, $country\n";
				# Data::Dumper->new([$properties])->Dump();
			my $row = {
				'LAT' => $properties->{'geom:latitude'},
				'LON' => $properties->{'geom:longitude'},
				'COUNTY' => uc($properties->{'wof:name'}),
				'STATE' => $state,
				'COUNTRY' => $country,
			};
			$inserts += import(row => $row, file => $file, ua => $ua, dbh => $dbh, redis => $redis, global => 1);
			next;
		}
		if($city) {
			# print "\t$country, $state, $city\n";
		} else {
			# print "\t$country, $state\n";
		}
		if(!defined($city)) {
			my $address = $properties->{'addr:full'};
			next if(!defined($address));
			if(my $href = (Geo::StreetAddress::US->parse_location($address) || Geo::StreetAddress::US->parse_address($address))) {
				if($href->{city}) {
					$city = $href->{city};
				}
			}
		}
		next if(!defined($city));
		my $postcode = $properties->{'addr:postcode'};
		if($postcode) {
			$postcode = uc($postcode);
		}
		my $street = $properties->{'addr:street'};
		if($street) {
			$street = uc($street);
		}
		my $number = $properties->{'addr:number'} || $properties->{'addr:housenumber'};
		if($number) {
			$number = uc($number);
		}
		$state = uc($state);
		# print "\tqueuing ", $properties->{'wof:id'}, ': ', $properties->{'wof:name'}, ", $city, $state, $country\n";
		if(my $name = $properties->{'wof:name'}) {
			if($name ne $city) {
				my $row = {
					'LAT' => $properties->{'geom:latitude'} || $properties->{'lbl:latitude'},
					'LON' => $properties->{'geom:longitude'} || $properties->{'lbl:longitude'},
					'NAME' => uc($name),
					'NUMBER' => $number,
					'STREET' => $street,
					'CITY' => uc($city),
					'STATE' => $state,
					'COUNTRY' => $country,
					'POSTCODE' => $postcode,
				};
				unless($row->{'LAT'} && $row->{'LON'}) {
					print 'Empty LAT/LON in WOF entry ',
						$properties->{'wof:id'},
						" in $file ",
						Data::Dumper->new([$properties])->Dump()
						if(DEBUG&DEBUG_DATA_VALIDATE);
					next;
				}
				$inserts += import(row => $row, file => $file, ua => $ua, dbh => $dbh, redis => $redis, global => 1);
			}
		}
		my $row = {
			'LAT' => $properties->{'geom:latitude'},
			'LON' => $properties->{'geom:longitude'},
			# 'NAME' => uc($properties->{'wof:name'}),
			'NUMBER' => $number,
			'STREET' => $street,
			'CITY' => uc($city),
			'STATE' => $state,
			'COUNTRY' => $country,
			'POSTCODE' => $postcode,
		};
		$inserts += import(row => $row, file => $file, ua => $ua, dbh => $dbh, redis => $redis, global => 1);
		if(my $addr_full = $properties->{'addr:full'}) {
			my $ap;
			if($country =~ /^ENGLAND/) {
				$address_parsers{'GB'} ||= Lingua::EN::AddressParse->new(country => 'GB', auto_clean => 1, force_case => 1, force_post_code => 0);
				$ap = $address_parsers{'GB'};
			} else {
				$address_parsers{$country} ||= Lingua::EN::AddressParse->new(country => $country, auto_clean => 1, force_case => 1, force_post_code => 0);
				$ap = $address_parsers{$country};
			}
			if($ap) {
				if($ap->parse($addr_full)) {
					# print STDERR 'Address parse failed: ', $ap->report(), "\n";
				} else {
					my %c = $ap->components();
					if(my $type = $c{'street_type'}) {
						$c{'street_name'} .= " $type";
					}
					$row = {
						'LAT' => $properties->{'geom:latitude'},
						'LON' => $properties->{'geom:longitude'},
						'NUMBER' => $c{'property_identifier'},
						'STREET' => uc($c{'street_name'}),
						'CITY' => uc($c{'suburb'}),
						'STATE' => $state,
						'COUNTRY' => $country,
						'POSTCODE' => uc($c{'post_code'}),
					};
					# print(Data::Dumper->new([$row])->Dump()) if(DEBUG&DEBUG_ALL);
					$inserts += import(row => $row, file => $file, ua => $ua, dbh => $dbh, redis => $redis, global => 1);
				}
			}
		}
		if($inserts >= MAX_INSERT_COUNT) {
			flush_queue($dbh, $redis);
			$inserts = 0;
		}
	}
}

# Find all of the .csv files in $OPENADDR_HOME
foreach my $csv_file (create_tree($oa, 1)) {
	# next unless($csv_file =~ /us\/md\/statewide/);
	# next unless($csv_file =~ /us\/ne\/dawes/);
	# next unless($csv_file =~ /us\/in\//);

	# Handle https://github.com/openaddresses/openaddresses/issues/3928
	# TODO: It would be better to merge airdrie.csv and city_of_airdrie.csv
	next if($csv_file =~ /ca\/ab\/airdrie.csv/);

	my $f = $csv_file;
	$f =~ s/^$oa\///;
	my @components = split(/\//, $f);
	if(my $country = uc($components[0])) {
		my $state;
		my $file;

		next unless($openaddresses_supported_countries{lc($country)});

		if($components[1] =~ /\.csv$/) {
			$file = $components[1];
		} else {
			$state = uc($components[1]);
			$file = $components[2];
		}

		# Clear the deduping hash when we can, to avoid it becoming too large
		if($country eq 'US') {
			if((!defined($current_state)) || ($state ne $current_state)) {
				print "New state, clear state_md5s; state = $state, country = $country\n" if(DEBUG&DEBUG_NEW_LOCATION);
				flush_queue($dbh, $redis);	# Check for hanging dups in current state
				%state_md5s = ();
				%state_parent_md5s = ();
				$current_country = $country;
				$current_state = $state;
			}
		} elsif((!defined($current_country)) || ($country ne $current_country)) {
			# print "New country, clear state_md5s\n";
			flush_queue($dbh, $redis);	# Check for hanging dups in current country
			%state_md5s = ();
			%state_parent_md5s = ();
			$current_country = $country;
		}

		# Import this state's hand curated data
		if(my $k = $known_places{$f}) {
			# print "Known place:\n\t", Data::Dumper->new([\$k])->Dump();
			foreach my $row(@{$k}) {
				$inserts += import(row => $row, file => $file, ua => $ua, dbh => $dbh, redis => $redis, global => 1);
				if($inserts >= MAX_INSERT_COUNT) {
					flush_queue($dbh, $redis);
					$inserts = 0;
				}
			}
		}

		my $fh = File::Open::NoCache::ReadOnly->new($csv_file)
			or die($csv_file);

		my $fin = $fh->fd();

		# open(my $fin, '<', $csv_file);
		aio_readahead($fin, 0, AIO_READAHEAD_SIZE);
		my $offset = AIO_READAHEAD_SIZE;

		$| = 1;
		printf "%-70s\r", $f;
		$| = 0;
		print "\n" if(MAX_INSERT_COUNT == 1);

		# Import this state's OpenAddresses data
		my $csv = Text::CSV->new({
			# sep_char => $sep_char,
			allow_loose_quotes => 1,
			blank_is_undef => 1,
			empty_is_undef => 1,
			binary => 1,
			# Workaround for https://github.com/openaddresses/openaddresses/issues/3905
			# escape_char => '\\',	# Put back once issue 3905 has been fixed
		});

		$csv->column_names($csv->getline($fin));

		while(my $row = $csv->getline_hr($fin)) {
			$inserts += import(country => $country, state => $state, row => $row, file => $csv_file, ua => $ua, dbh => $dbh, redis => $redis);
			if($inserts >= MAX_INSERT_COUNT) {
				flush_queue($dbh, $redis);
				aio_readahead($fin, $offset, AIO_READAHEAD_SIZE);
				$inserts = 0;
				$offset += AIO_READAHEAD_SIZE;
			}

		}

		$fh->close();

		flush_queue($dbh, $redis);	# Check for hanging dups in current state
		$inserts = 0;
	}
}

# print "Debug exit\n" if(DEBUG&DEBUG_ALL);
# $dbh->commit();
# $dbh->disconnect();
# $wof_global_dbh->disconnect();
# exit(0);

flush_queue($dbh, $redis);	# Check for hanging dups in last state
%state_md5s = ();
%state_parent_md5s = ();

foreach my $country(@whosonfirst_only_countries) {
	# Import this country's hand curated data
	if(my $k = $known_places{'other'}) {
		# print "Known place:\n\t", Data::Dumper->new([\$k])->Dump();
		foreach my $row(@{$k}) {
			$inserts += import(row => $row, file => "$country/countrywide.csv", ua => $ua, dbh => $dbh, redis => $redis);
		}
	}

	if($inserts >= MAX_INSERT_COUNT) {
		flush_queue($dbh, $redis);
		$inserts = 0;
	}
}

flush_queue($dbh, $redis);
%whosonfirst = ();

if(my $dr5hn = $ENV{'DR5HN_HOME'}) {
	my $filename = File::Spec->catfile($dr5hn, 'countries+states+cities.json');

	$| = 1;
	printf "%-70s\r", $filename;
	print "\n" if(DEBUG);
	$| = 0;

	my $j = JSON::MaybeXS->new()->utf8();

	my $data = File::Slurp::read_file($filename);
	$data = $j->decode($data);

	foreach my $country(@{$data}) {
		if(($country->{'iso3'} eq 'USA') || ($country->{'iso3'} eq 'CAN') || ($country->{'iso3'} eq 'AUS')) {
			if($country->{'name'} eq 'United Statees') {
				$country->{'name'} = 'US';
			}
			foreach my $state(@{$country->{'states'}}) {
				foreach my $city(@{$state->{'cities'}}) {
					my $row = {
						'COUNTRY' => $country->{'name'},
						'STATE' => $state->{'state_code'},
						'CITY' => $city->{'name'},
						'LAT' => $city->{'latitude'},
						'LON' => $city->{'longitude'},
					};
					$inserts += import(row => $row, file => $filename, ua => $ua, dbh => $dbh, redis => $redis);
					# print Data::Dumper->new([$row])->Dump();
					if($inserts >= MAX_INSERT_COUNT) {
						flush_queue($dbh, $redis);
						$inserts = 0;
					}
				}
			}
		# } elsif($country->{'iso3'} eq 'GBR') {
			# TODO
		};
	}

	flush_queue($dbh, $redis);
	$inserts = 0;
}

if(my $osm = $ENV{'OSM_HOME'}) {
	# Openstreetmap

	my @files = ('north-america-latest.osm.bz2', 'europe-latest.osm.bz2', 'australia-oceania-latest.osm.bz2');

	foreach my $file(@files) {
		my $filename = File::Spec->catfile($osm, $file);

		$| = 1;
		printf "%-70s\r", $filename;
		$| = 0;
		print "\n" if(MAX_INSERT_COUNT == 1);

		# TODO: check for the presence of bzcat
		open(my $pin, '-|', "bzcat $filename");
		my $reader = XML::LibXML::Reader->new(FD => $pin)
			or die "cannot read $filename";

		my $in_node;
		my $node;
		my $key;
		my $name;
		my $is_in;
		my $lat;
		my $lon;
		my $place;

		while($reader->read()) {
			# These constants are not exported by default :-(
			if($reader->nodeType() == 1) {
				$node = $reader->name();
				if($node eq 'node') {
					if($reader->hasAttributes()) {
						$lat = $reader->getAttribute('lat');
						$lon = $reader->getAttribute('lon');
						$in_node = 1;
						$name = undef;
						$is_in = undef;
					}
				} elsif($in_node) {
					if($node eq 'tag') {
						if($reader->hasAttributes()) {
							my $key = $reader->getAttribute('k');
							# print "$key\n";
							if($key eq 'name:en') {
								$name = $reader->getAttribute('v');
								# print "$name\n";
							} elsif(($key eq 'name') && !defined($name)) {
								$name = $reader->getAttribute('v');
							} elsif($key eq 'is_in') {
								$is_in = $reader->getAttribute('v');
							} elsif($key eq 'is_in:country') {
								my $country = $reader->getAttribute('v');
								if(defined($is_in) && ($is_in !~ /\Q$country\E$/)) {
									$is_in .= ", $country";
								} elsif(!defined($is_in)) {
									$is_in = $country;
								}
							} elsif($key eq 'place') {
								$place = $reader->getAttribute('v');
							}
						}
					}
				}
			} elsif($reader->nodeType() == 15) {
				if(defined($name) && defined($is_in) && defined($lat) && defined($lon) && defined($place)) {
					my $add_record = 1;

					$is_in =~ s/,(\w)/, $1/g;
					$is_in =~ s/, United Kingdom/, GB/;
					$is_in =~ s/, UK,.+$/, GB/;
					$is_in =~ s/^UK, UK$/GB/;
					$is_in =~ s/GB, GB$/GB/;
					$is_in =~ s/, UK$/, GB/;
					$is_in =~ s/(England|Scotland|Wales), GB/GB/;
					$is_in =~ s/Yorkshire, UK/Yorkshire, GB/;
					$is_in =~ s/, Europe$//;
					$is_in =~ s/;\s?/, /g;
					$is_in =~ s/United States of America.*/US/;
					$is_in =~ s/United States$/US/;
					$is_in =~ s/USA$/US/;
					$is_in =~ s/(\w)? USA$/$1, US/;
					my $preamble;
					my $state;
					my $country;
					if($is_in =~ /(.+), (.+), US/) {
						$preamble = $1;
						$state = $2;
						$country = 'US';
					} elsif($is_in =~ /^(.+), US/) {
						$state = $1;
						$country = 'US';
					}
					if($state && (length($state) > 2)) {
						if(my $code = $us->{state2code}{uc($state)}) {
							if($preamble) {
								$is_in = "$preamble, $code, US";
							} else {
								$is_in = "$code, US";
							}
							$country = 'US';
						} else {
							warn "$is_in: unknown US state $state" if(DEBUG&DEBUG_DATA_VALIDATE);
							$add_record = 0;
						}
					} elsif(my $code = $us->{state2code}{uc($is_in)}) {
						$is_in = "$code, US";
					} elsif($code = $ca->{province2code}{uc($is_in)}) {
						$is_in = "$code, Canada";
						$country = 'Canada';
					} elsif($code = $au->{state2code}{uc($is_in)}) {
						$is_in = "$code, Australia";
						$country = 'Australia';
					} else {
						if($is_in =~ /(.+), (.+), Canada/) {
							$preamble = $1;
							$state = $2;
							$country = 'Canada';
						} elsif($is_in =~ /^(.+), Canada/) {
							$state = $1;
							$country = 'Canada';
						}
						if($state && (length($state) > 2)) {
							if($state =~ /Qu.bec/) {
								$state = 'Quebec';
							}
							if(my $code = $ca->{province2code}{uc($state)}) {

								if($preamble) {
									$is_in = "$preamble, $code, Canada";
								} else {
									$is_in = "$code, Canada";
								}
								$country = 'Canada';
							} else {
								die "$is_in: unknown Canadian province $state";
							}
						} elsif($is_in =~ /(Australia|Canada|US|GB)$/) {
							$add_record = 0;
						}
					}
					if($add_record) {
						if($is_in !~ /,/) {
							if($is_in ne 'GB') {
								if(!Locale::Country::country2code($is_in)) {
									$add_record = 0;
								}
							}
						} elsif($is_in =~ /(.+), (.+)$/) {
							my $place = $1;
							$country = $2;
							if($country ne 'GB') {
								if(!Locale::Country::country2code($country)) {
									$add_record = 0;
								}
							}
						}
						$add_record = 0 if($is_in =~ /,.+,/);	# Just towns for now
						# means it's another country
						$add_record = 0 if(!defined($country));
						if($add_record) {
							print "$name, $is_in: $lat, $lon\n" if(DEBUG&DEBUG_ALL);
							my $row = {
								'CITY' => $name,
								'STATE' => $state,
								'COUNTRY' => $country,
								'LAT' => $lat,
								'LON' => $lon,
							};
							$inserts += import(country => $country, state => $state, row => $row, file => $filename, ua => $ua, dbh => $dbh, redis => $redis);
							if($inserts >= MAX_INSERT_COUNT) {
								flush_queue($dbh, $redis);
								$inserts = 0;
							}
						}
					}
					$name = undef;
					$lat = undef;
					$lon = undef;
					$is_in = undef;
					$key = undef;
					$node = undef;
					$in_node = 0;
				}
			}
		}
	}

	flush_queue($dbh, $redis);	# Check for hanging dups in current state
	$inserts = 0;
}

# $| = 1;
# printf "%-70s\r", 'creating cities';
# print "\n" if(DEBUG);
# $| = 0;
# foreach my $key(keys %cities) {
	# my ($city, $county, $state, $country) = split(/,/, $key);
	# my $sequence = $cities{$key};

	# $city =~ s/'/''/g;
	# my $query = "INSERT INTO cities('SEQUENCE','CITY','COUNTY','STATE','COUNTRY'" .
		# ') VALUES (' .
		# "'$sequence'," .
		# (($city eq 0) ? "NULL," : "'$city',") .
		# (($county eq 0) ? "NULL," : "'$county',") .
		# "'$state'," .
		# "'$country')";

	# # print "$query\n";
	# $dbh->do($query);
# }

$| = 1;
printf "%-70s\r", 'creating indicies';
print "\n" if(DEBUG);
$| = 0;

# Reclaim memory
%state_md5s = ();
%state_parent_md5s = ();
%global_md5s = ();
%cities = ();

if(MAX_INSERT_COUNT > 1) {
	# $dbh->prepare('CREATE UNIQUE INDEX sequence_index ON cities(sequence)')->execute();
	if($dbh) {
		$dbh->prepare('CREATE UNIQUE INDEX md5_index ON openaddresses(md5)')->execute();
	}
	# $dbh->prepare('CREATE UNIQUE INDEX tree_index ON tree(md5,parent)')->execute();
	# $dbh->prepare('CREATE UNIQUE INDEX node_index ON level(md5)')->execute();
	# $dbh->prepare('CREATE UNIQUE INDEX leaf_index ON leaf(md5)')->execute();
}

$| = 1;
printf "%-70s\r", 'committing';
print "\n" if(DEBUG);
$| = 0;
if($dbh) {
	$dbh->commit();
	$dbh->do('pragma optimize');
	$dbh->disconnect();
}
if($redis) {
	$redis->bgsave();
	$redis->shutdown();
}
print ' ' x 70, "\r" if(!DEBUG);

# Return a list of all .csv or .geojson files under the given directory
sub create_tree {
	my $where = shift;
	my $csv = shift;
	my @files;
	my $fin;

	$| = 1;
	printf "%-70s\r", $where;
	print "\n" if(DEBUG);
	$| = 0;

	if($csv) {
		open($fin, '-|', "find $where -name *.csv");
	} else {
		open($fin, '-|', "find $where -name *.geojson");
	}
	while(my $line = <$fin>) {
		# print $line;
		chomp $line;
		push @files, $line;
	}
	close $fin;

	return @files;
}

# Return a list of all .csv or .geojson files from all the Git repositories
# in the current directory
sub create_tree_from_git {
	my $where = shift;
	my $csv = shift;
	my @files;
	my $olddir = getcwd();

	$| = 1;
	printf "%-70s\r", $where;
	print "\n" if(DEBUG);
	$| = 0;

	foreach my $dir(<"$where/*/.git">) {
		my($d1, $d2) = fileparse($dir);	# File::BaseName
		chdir $d2;
		open(my $fin, '-|', 'git ls-tree -r master --name-only');
		while(my $line = <$fin>) {
			chomp $line;
			my $file = File::Spec->catfile($d2, $line);
			if($csv && ($line =~ /\.csv$/)) {
				push(@files, $file);
			} elsif($line =~ /\.geojson$/) {
				push(@files, $file);
			}
		}
		close $fin;
	}
	chdir $olddir;

	return @files;
}

# Import a row into the database
sub import {
	my %param;
	if(ref($_[0]) eq 'HASH') {
		%param = %{$_[0]};
	} elsif(ref($_[0])) {
		die 'import: bad args';
	} elsif(scalar(@_) % 2 == 0) {
		%param = @_;
	} else {
		die 'import: no args';
	}

	my $r = $param{'row'} || die 'import: no row';
	my $row = { %{$r} };	# Take a copy of the hash because we're going to play with it
	my $country = $param{'country'} || $row->{'COUNTRY'} || die 'import: no country';
	my $state = $param{'state'} || $row->{'STATE'};	# Will come from $row->{'REGION'}
	my $file = $param{'file'} || die 'import: no file';
	my $ua = $param{'ua'} || die 'import: no ua';
	my $dbh = $param{'dbh'};
	my $redis = $param{'redis'};
	if((!$dbh) && !$redis) {
		die 'import: neither dbh nor redis';
	}
	my $global = $param{'global'} || 0;
	my $inserts = 0;

	if($state) {
		$state = uc($state);
	}

	my $city = $row->{'CITY'};
	my $county = $row->{'COUNTY'};
	if(($file !~ /^statewide/) && ($file !~ /^province/) && ($file !~ /countrywide/)) {
		if($file =~ /^city_of_(.+).csv$/) {
			$city = $1;
		} elsif($file =~ /^town_of_(.+).csv$/) {
			$city = $1;
		} elsif($file =~ /^(.+)_borough.csv$/) {
			$city = $1;
		} elsif($file =~ /^(.+)-region.csv$/) {
			$city = $1;
		} elsif($file =~ /^township_of_(.+).csv$/) {
			$city = $1;
		} elsif($file =~ /^(.+)_district.csv$/) {
			$city = $1;
		} elsif(!defined($city)) {
			$city = $file;
			$city =~ s/\.csv$//;
		} elsif(($file !~ /^WHOSONFIRST\-DATA/i) && ($file !~ /countrywide/)) {
			$county = $file;
			$county =~ s/\.csv$//;
		}
	}
	if(!defined($city)) {
		# print "$state:\n", Data::Dumper->new([\$row])->Dump();
		my $zip = $row->{'POSTCODE'};
		if((!defined($city)) && defined($zip) && (my $info = $zipcodes{$zip})) {
			$city = $info->{'city'};
			$county = $info->{'county'};
		}
		if((!defined($city)) && defined($zip) && ($zip =~ /^(\d{5})/)) {
			$zip = $1;
			if(exists($unknown_zips{"$country/$zip"})) {
				return 0;
			}
			my $res = $ua->get("https://api.zippopotam.us/$country/$zip");
			if(!$res->is_success()) {
				warn "$country/$zip: ", $res->status_line();
				$unknown_zips{"$country/$zip"} = 1;
				return 0;
			}
			my $rc = JSON::MaybeXS->new()->utf8()->decode($res->content());
			if(!defined($rc)) {
				# print "\n", Data::Dumper->new([\$row])->Dump();
				return 0;
			}
			my $place = $rc->{'places'}->[0];
			if(!$place) {
				# print "\n", Data::Dumper->new([\$row])->Dump();
				return 0;
			}
			$city = uc($place->{'place name'});
			$zipcodes{$zip} = { 'city' => $city };
			# print "$zip => $city\n";
			if($city) {
				# Counties and states alone have already been read in
				my %columns = (
					'COUNTRY' => $country,
					'STATE' => $state,
					'COUNTY' => $county,
					'CITY' => $city,
					'LAT' => $place->{'latitude'},
					'LON' => $place->{'longitude'},
				);
				# print "$zip => $query\n";
				$inserts = insert($dbh, $redis, $global, %columns);
			}
		}
		if((!defined($city)) && !defined($state)) {
			# die Data::Dumper->new([\$row])->Dump();
			return 0 if(!$zip);
			return 0 if($zip !~ /^\d{5}/);
			$dbh->disconnect() if($dbh);
			die $file;
		}
	}
	if((!defined($row->{'LAT'})) || !defined($row->{'LON'})) {
		my @call_details = caller(0);
		print 'Empty LAT/LON in ',
			($state ? "$state/" : ''),
			"$file from line $call_details[2]: ",
			Data::Dumper->new([$row])->Dump()
			if(DEBUG&DEBUG_DATA_VALIDATE);
		return 0;
	}
	return 0 if(($row->{'LAT'} == 0) && ($row->{'LON'} == 0));
	my $street = $row->{'STREET'};
	if($street) {
		$street =~ s/\s\s+/ /g;

		if($city && ($city =~ /(.+),\s*(.+)/)) {
			# For example the city could be "North Side, Chicago", which comes from the case when a borough record
			# has been added.  Let's allow searches on the borough or the city alone
			my $borough = $1;
			my $city_name = $2;
			print "Break up $city\n\tstreet = $street\n\tborough = $borough\n\tcity_name = $city_name\n" if(DEBUG&DEBUG_BREAKUP);
			if($borough ne $street) {
				my %columns = ( %{$param{'row'}}, 'COUNTRY' => $country, 'STATE' => $state, 'COUNTY' => $county, 'CITY' => $borough );
				print "\t", Data::Dumper->new([\%columns])->Dump() if(DEBUG&DEBUG_BREAKUP);
				$inserts += insert($dbh, $redis, $global, %columns);
				$columns{'CITY'} = $city_name;
				print "\t", Data::Dumper->new([\%columns])->Dump() if(DEBUG&DEBUG_BREAKUP);
				$inserts += insert($dbh, $redis, $global, %columns);
				print "\tDone\n" if(DEBUG&DEBUG_BREAKUP);
			}
		}
	}
	if($city) {
		$city =~ s/\.csv$//;
		$city =~ s/[_,\-\.]/ /g;
		$city = uc($city);
		$city =~ s/\s+BORO$//;
		$city =~ s/\s+TWP$//;
		$city =~ s/^TOWN\s+OF\s+//;
		$city =~ s/^CITY\s+OF\s+//i;
		$city =~ s/^THE\s+CITY\s+OF\s+//i;
		$city =~ s/^TOWNSHIP\s+OF\s+//;
		$city =~ s/\s\s+/ /g;
		$city =~ s/\s+$//g;
		$city =~ s/^\s+//g;
	}
	if($street) {
		$street = uc($street);
		if($street =~ /(.+)\s+(.+)\s+(.+)/) {
			my $a;
			if((lc($2) ne 'cross') && ($a = $abbr->abbreviate($2))) {
				$street = "$1 $a $3";
			} elsif($a = $abbr->abbreviate($3)) {
				$street = "$1 $2 $a";
			}
		} elsif($street =~ /(.+)\s(.+)$/) {
			if(my $a = $abbr->abbreviate($2)) {
				$street = "$1 $a";
			}
		}
		$street =~ s/^0+//;	# Turn 04th St into 4th St
	}
	if($file =~ /^(.+)[_\-]county.csv$/) {
		$county = $1;
	} elsif((!defined($county)) && defined($city)) {
		if($city =~ /(.+)\s+COUNTY$/i) {
			$county = $1;
			$city = undef;
		}
	}
	if((!defined($county)) && $row->{'DISTRICT'} && ($row->{'DISTRICT'} ne $state)) {
		if($row->{'DISTRICT'} !~ /^\d+$/) {
			$county = $row->{'DISTRICT'};
		}
	}
	if($county) {
		if($city) {
			if($city =~ /\s+COUNTY$/i) {
				my $tmp = $city;
				$city = $county;
				$county = $tmp;
			}
			if(($city eq $county) ||
			   ($city eq "$county COUNTY") ||
			   ($county eq "$city COUNTY")) {
				$city = undef;
			}
		}
		$county = uc($county);
		$county =~ s/_/ /g;
		$county =~ s/\s+COUNTY$//;
		$county =~ s/\s\s+/ /g;
		$county =~ s/\s+$//g;
		$county =~ s/^\s+//g;
	}
	if(defined($state)) {
		if($state eq 'IN') {
			if(defined($city) && ($city eq 'FW')) {
				$city = 'FORT WAYNE';
				$county = 'ALLEN';
			} elsif(defined($county) && ($county eq 'LAPORTE')) {
				$county = 'LA PORTE';
			}
		} elsif($state eq 'MO') {
			if(defined($city) && ($city eq 'SAINT LOUIS')) {
				$city = 'ST. LOUIS';
				$county = undef;
			}
		}
	}
	if(($city && ($city !~ /^\s+$/)) || $county || $state) {
		my %columns = (
			'COUNTRY' => $country,
			'CITY' => $city,
			'STATE' => $state,
			'COUNTY' => $county,
			'STREET' => $street
		);
		foreach my $c('LAT', 'LON', 'NAME', 'NUMBER') {
			$columns{$c} = $row->{$c};
		}
		if((!defined($city)) ||
		   ($country eq 'GB') && ($city eq 'LONDON') && defined($state) && ($state eq 'LONDON')) {
			delete $columns{'CITY'};
		}
		foreach my $column(keys %columns) {
			delete $columns{$column} if(!defined($columns{$column}));
		}
		if((!defined($city)) && defined($street) && defined($row->{'REGION'}) && !defined($county)) {
			$county = uc($row->{'REGION'});
			if(!defined($street)) {
				print "County and street with no city:\n", Data::Dumper->new([$row])->Dump() if($county && (DEBUG&DEBUG_DATA_VALIDATE));
				return $inserts;
			}
		}
		# print Data::Dumper->new([\%columns])->Dump() if(!defined($city) && !defined($county));
		$inserts += insert($dbh, $redis, $global, %columns);
		if(delete($columns{'COUNTY'})) {
			return $inserts if(!defined($city));
			$inserts += insert($dbh, $redis, $global, %columns);
		}
		my $name = $columns{'NAME'};
		if($name) {
			# TODO: more of these
			# foreach my $classifier($properties->{'sg:classifier'}) {
			#	Check if $name ends with uc($classifier->{category})
			if($name =~ /^(.+)\s+RESTAURANT/i) {
				$columns{'NAME'} = $1;
				$inserts += insert($dbh, $redis, $global, %columns);
				# print "RESTAURANT: $1\n";
			}
			if(delete($columns{'NAME'})) {
				$inserts += insert($dbh, $redis, $global, %columns);
			}
		}
		if(delete($columns{'NUMBER'})) {
			if($name) {
				# For when a name is known but not a street number
				$columns{'NAME'} = $name;
				$inserts += insert($dbh, $redis, $global, %columns);
				if($name =~ /^(.+)\s+RESTAURANT/i) {
					$columns{'NAME'} = $1;
					$inserts += insert($dbh, $redis, $global, %columns);
					# print "RESTAURANT: $1\n";
				}
				delete($columns{'NAME'});
			}
			# Match somewhere in the street when number isn't known
			$inserts += insert($dbh, $redis, $global, %columns);
			if(delete($columns{'STREET'})) {
				if($name) {
					# For when a name is known but not a street
					$columns{'NAME'} = $name;
					$inserts += insert($dbh, $redis, $global, %columns);
					if($name =~ /^(.+)\s+RESTAURANT/i) {
						$columns{'NAME'} = $1;
						$inserts += insert($dbh, $redis, $global, %columns);
						# print "RESTAURANT: $1\n";
					}
					delete($columns{'NAME'});
				}
				# Match somewhere in the city when street isn't known
				$inserts += insert($dbh, $redis, $global, %columns);
			}
		} elsif($name && delete($columns{'STREET'})) {
			# For when a name is known but not a street
			$columns{'NAME'} = $name;
			$inserts += insert($dbh, $redis, $global, %columns);
			if($name =~ /^(.+)\s+RESTAURANT/i) {
				$columns{'NAME'} = $1;
				$inserts += insert($dbh, $redis, $global, %columns);
				# print "RESTAURANT: $1\n";
			}
		}
	}
	return $inserts;
}

# Insert data into the database, returns the number of new rows
sub insert {
	my ($dbh, $redis, $global, %columns) = @_;

	foreach my $column(keys %columns) {
		if(!defined($columns{$column})) {
			delete $columns{$column};
		} elsif($columns{$column} =~ /^\s+$/) {
			delete $columns{$column};
		}
	}

	# print Data::Dumper->new([$columns])->Dump() if($columns{'CITY'} && (uc($columns{'CITY'}) eq 'INDIANAPOLIS'));
	# print Data::Dumper->new([$columns])->Dump() if($columns{'NAME'} && (uc($columns{'NAME'}) eq 'EL PAPI STREET TACOS'));
	# {
		# my @call_details = caller(0);
		# print "line " . $call_details[2], "\n";
		# print Data::Dumper->new([\%columns])->Dump();
	# }

	my $lat = $columns{'LAT'};
	my $lon = $columns{'LON'};

	die Data::Dumper->new([\%columns])->Dump() unless(defined($lat) && defined($lon));
	# die "$lat/$lon" unless($columns{'STATE'});

	# Remove duplicate, leading and trailing spaces from entries.  Some openaddresses files are littered with them
	foreach my $column(keys %columns) {
		$columns{$column} =~ s/\s+$//g;
		$columns{$column} =~ s/^\s+//g;
		$columns{$column} =~ s/\s\s+/ /g;
	}

	# my $digest = Digest::MD5::md5_base64(map { Encode::encode_utf8($columns{$_}) } sort keys %{$columns});
	my $digest;
	# print Data::Dumper->new([$columns])->Dump();
	foreach my $column('NAME','NUMBER','STREET','CITY','COUNTY','STATE','COUNTRY') {
		$digest .= $columns{$column} if($columns{$column});
	}
	# print "$digest\n";
	$digest = create_md5($global, Encode::encode_utf8($digest));

	if(!defined($digest)) {
		# print "Ignore ", join(',', values(%columns)), "\n";
		return 0;
	}

	# if(!defined($columns{'CITY'}) && !defined($columns{'COUNTY'})) {
		# my @call_details = caller(0);
		# print $call_details[2], '->', __LINE__, ": $digest:\n", Data::Dumper->new([\%columns])->Dump() if($columns{'COUNTRY'} eq 'US');
	# }
	# print "$digest\n";

	# $dbh->prepare("SELECT * FROM openaddresses WHERE MD5 = '$digest'");
	# $sth->execute();
	# while(my $href = $sth->fetchrow_hashref()) {
		# my @call_details = caller(0);
		# print "line " . $call_details[2], "\n";
		# print Data::Dumper->new([\%columns])->Dump();
		# print Data::Dumper->new([$href])->Dump();
	# }

	my @call_details = caller(0);
	$columns{'LINE'} = $call_details[2] if(DEBUG);
	$queued_commits{$digest} = \%columns;
	if(DEBUG&DEBUG_ALL) {
		flush_queue($dbh, $redis);
	}
	if(!defined($columns{'COUNTRY'})) {
		print "insert: no country. Called from line ", $call_details[2], "\n";
		die Data::Dumper->new([\%columns])->Dump();
	}
	my $key = city_key($columns{'CITY'}, $columns{'COUNTY'}, $columns{'STATE'}, $columns{'COUNTRY'});
	if(!$cities{$key}) {
		$city_sequence++;
		# print "adding '$key' to cities list\n";
		$cities{$key} = $city_sequence;
	}

	# Some postal address parsers have problems with "N FOO ST", so also store "NFOOST"
	if($columns{'STREET'} && ($columns{'STREET'} =~ /^[A-Z]\s\w+\s\w+$/)) {
		my %columns2 = %columns;
		# Handle https://rt.cpan.org/Public/Bug/Display.html?id=124919
		$columns2{'STREET'} =~ s/\s+//g;
		$columns2{'LINE'} = $call_details[2] if(DEBUG);
		$digest = undef;

		foreach my $column('NUMBER','STREET','CITY','COUNTY','STATE','COUNTRY') {
			$digest .= $columns2{$column} if($columns2{$column});
		}
		if($digest = create_md5($global, Encode::encode_utf8($digest))) {
			$queued_commits{$digest} = \%columns2;
			if(DEBUG&DEBUG_ALL) {
				flush_queue($dbh, $redis);
			}
			return 2;
		# } else {
			# print "Ignore ", join(',', values(%columns2)), "\n";
		}
	}
	return 1;
}

# All of the place's values are combined into one INSERT INTO
# Be aware of https://github.com/openaddresses/openaddresses/issues/3928
sub flush_queue
{
	my ($dbh, $redis) = @_;

	print "flush\n" if(DEBUG&DEBUG_FLUSH);

	# my @columns = ('LAT','LON','NUMBER','STREET','CITY','COUNTY','STATE','COUNTRY');
	# my @columns = ('LAT','LON','NAME','NUMBER','STREET');

	my $query;

	keys %queued_commits;
	# print scalar(keys %queued_commits), "\n";
	# while(my($md5, $row) = each %queued_commits) {
	foreach my $md5(keys %queued_commits) {
		my $row = $queued_commits{$md5};
		# if($md5 eq 'D8GO4pMZCAYej/OR') {
			# my @call_details = caller(0);
			# print "flush_queue: Flomation called from line ", $call_details[2], "\n\t",
				# Data::Dumper->new([\%queued_commits])->Dump();
		# }
		die if(ref($row) ne 'HASH');
		my $country = $row->{'COUNTRY'};
		if(!defined($country)) {
			my @call_details = caller(0);
			print "flush_queue: called from line ", $call_details[2], "\n";
			die Data::Dumper->new([$row])->Dump();
		}
		my $state = $row->{'STATE'};
		if($max_state_lengths{$country} && $state && (length($state) > $max_state_lengths{$country})) {
			print STDERR 'Invalid state length: ', Data::Dumper->new([$row])->Dump() if(DEBUG&DEBUG_INVALID_LENGTH);
			next;
		}
		if(length($country) != 2) {
			print STDERR "Invalid country length:\n", Data::Dumper->new([$row])->Dump() if(DEBUG&DEBUG_INVALID_LENGTH);
			next;
		}

		# die Data::Dumper->new([$row])->Dump() unless ($row->{'LAT'} && $row->{'LON'});
		unless ($row->{'LAT'} && $row->{'LON'}) {
			my @call_details = caller(0);
			print "flush_queue: ignoring blank entry called from line ", $call_details[2], "\n\t",
				Data::Dumper->new([$row])->Dump() if(DEBUG&DEBUG_DATA_VALIDATE);
			next;
		}

		if(!defined($query)) {
			$query = 'INSERT INTO openaddresses(LAT, LON, MD5) VALUES (';
		} else {
			$query .= ',(';
		}

		# print join(',', @columns), ": $md5\n";
		# print "$md5: ", Data::Dumper->new([$row])->Dump();

		if($redis) {
			print "writing to redis\n" if(DEBUG&DEBUG_FLUSH);
			$redis->hmset($md5, 'LAT', $row->{'LAT'}, 'LON', $row->{'LON'}, sub {});
		}
		$query .= $row->{'LAT'} . ',' . $row->{'LON'} . ",'$md5')";

		# my $key = city_key($row->{'CITY'}, $row->{'COUNTY'}, $state, $country);
		# foreach my $column(@columns) {
			# if($row->{$column}) {
				# if(($column eq 'LAT') || ($column eq 'LON') || ($column eq 'CITY')) {
					# $query .= $row->{$column} . ',';
				# } else {
					# $row->{$column} =~ s/'/''/g;
					# $query .= "'" . $row->{$column} . "',";
				# }
			# } else {
				# $query .= 'NULL,';
			# }
		# }
		# die $key unless $cities{$key};
		# $query .= $cities{$key} . ",'$md5')";
	}
	if($redis) {
		print "waiting for redis\n" if(DEBUG&DEBUG_FLUSH);
		$redis->wait_all_responses();
	}

	my $tree_query;
	# while(my($md5, $row) = each %queued_commits) {
	if(0) {
		my $row;
		my $digest = $row->{'COUNTRY'};
		my $parent_md5;

		my $md5 = $state_parent_md5s{$digest};
		if(!defined($md5)) {
			$md5 = substr(Digest::MD5::md5_base64(Encode::encode_utf8($digest)), 0, 16);
			$state_parent_md5s{$md5} = $digest;
			if(!defined($tree_query)) {
				$tree_query = 'INSERT INTO tree(lat,lon,md5,parent) VALUES ('
			} else {
				$tree_query .= ',(';
			}
			$tree_query .= $row->{'LAT'} . ',' .  $row->{'LON'} .
				",'$md5',NULL)";
		}

		$digest = undef;
		foreach my $column('STATE','COUNTRY') {
			$digest .= $row->{$column} if($row->{$column});
		}
		$parent_md5 = $md5;
		$md5 = substr(Digest::MD5::md5_base64(Encode::encode_utf8($digest)), 0, 16);
		if(!defined($state_parent_md5s{$md5})) {
			$state_parent_md5s{$md5} = $digest;
			if(!defined($tree_query)) {
				$tree_query = 'INSERT INTO tree(lat,lon,md5,parent) VALUES ('
			} else {
				$tree_query .= ',(';
			}
			$tree_query .= $row->{'LAT'} . ',' .  $row->{'LON'} .
				",'$md5','$parent_md5')";
		}

		if($row->{'COUNTY'}) {
			$digest = undef;
			foreach my $column('COUNTY','STATE','COUNTRY') {
				$digest .= $row->{$column} if($row->{$column});
			}
			$parent_md5 = $md5;
			$md5 = substr(Digest::MD5::md5_base64(Encode::encode_utf8($digest)), 0, 16);
			if(!defined($state_parent_md5s{$md5})) {
				$state_parent_md5s{$md5} = $digest;
				if(!defined($tree_query)) {
					$tree_query = 'INSERT INTO tree(lat,lon,md5,parent) VALUES ('
				} else {
					$tree_query .= ',(';
				}
				$tree_query .= $row->{'LAT'} . ',' .  $row->{'LON'} .
					",'$md5','$parent_md5')";
			}
		}
		if($row->{'CITY'}) {
			$digest = undef;
			foreach my $column('CITY','COUNTY','STATE','COUNTRY') {
				$digest .= $row->{$column} if($row->{$column});
			}
			$parent_md5 = $md5;
			$md5 = substr(Digest::MD5::md5_base64(Encode::encode_utf8($digest)), 0, 16);
			if(!defined($state_parent_md5s{$md5})) {
				$state_parent_md5s{$md5} = $digest;
				if(!defined($tree_query)) {
					$tree_query = 'INSERT INTO tree(lat,lon,md5,parent) VALUES ('
				} else {
					$tree_query .= ',(';
				}
				$tree_query .= $row->{'LAT'} . ',' .  $row->{'LON'} .
					",'$md5','$parent_md5')";
			}
		}
		if($row->{'STREET'}) {
			$digest = undef;
			foreach my $column('STREET','CITY','COUNTY','STATE','COUNTRY') {
				$digest .= $row->{$column} if($row->{$column});
			}
			$parent_md5 = $md5;
			$md5 = substr(Digest::MD5::md5_base64(Encode::encode_utf8($digest)), 0, 16);
			if(!defined($state_parent_md5s{$md5})) {
				$state_parent_md5s{$md5} = $digest;
				if(!defined($tree_query)) {
					$tree_query = 'INSERT INTO tree(lat,lon,md5,parent) VALUES ('
				} else {
					$tree_query .= ',(';
				}
				$tree_query .= $row->{'LAT'} . ',' .  $row->{'LON'} .
					",'$md5','$parent_md5')";
			}
		}
		if($row->{'NUMBER'}) {
			$digest = undef;
			foreach my $column('NUMBER','STREET','CITY','COUNTY','STATE','COUNTRY') {
				$digest .= $row->{$column} if($row->{$column});
			}
			$parent_md5 = $md5;
			$md5 = substr(Digest::MD5::md5_base64(Encode::encode_utf8($digest)), 0, 16);
			if(!defined($state_parent_md5s{$md5})) {
				$state_parent_md5s{$md5} = $digest;
				if(!defined($tree_query)) {
					$tree_query = 'INSERT INTO tree(lat,lon,md5,parent) VALUES ('
				} else {
					$tree_query .= ',(';
				}
				$tree_query .= $row->{'LAT'} . ',' .  $row->{'LON'} .
					",'$md5','$parent_md5')";
			}
		}

		# TODO: add NAME field
	}

	if($query && $dbh) {
		# print "$query\n";
		# $| = 1;
		# print '.';
		# $| = 0;
		try {
			$dbh->do($query);
		} catch {
			my @call_details = caller(1);
			print "Error in flush_queue ($query) called from line ", $call_details[2], "\n\t",
				Data::Dumper->new([\%queued_commits])->Dump();
			# print "$query\n";
			die $_;
		};
	}
	# if($tree_query) {
	if(0) {
		# print "$tree_query\n";
		# $| = 1;
		# print '.';
		# $| = 0;
		try {
			$dbh->do($tree_query);
		} catch {
			print Data::Dumper->new([\%queued_commits])->Dump();
			die $_;
		};
	}
	%queued_commits = ();
}

sub create_md5
{
	my $global = shift;

	# my $digest = Digest::MD5::md5_base64(@_);
	my $digest = substr Digest::MD5::md5_base64(@_), 0, 16;
	return if($global_md5s{$digest} || $state_md5s{$digest} || $queued_commits{$digest});

	# print join(',', @_), ": $digest\n";
	if($global) {
		$global_md5s{$digest} = 1;
	}
	$state_md5s{$digest} = 1;
	return $digest;
}

# State must be the abbreviated form
sub city_key {
	my ($city, $county, $state, $country) = @_;

	if(!defined($city)) {
		$city = '0';
	}
	if(!defined($county)) {
		$county = '0';
	}
	if(!defined($state)) {
		return "$city,$county,0,$country";
	}
	return "$city,$county,$state,$country";
}

# Given a Whosonfirst ID, return the matching geojson. Cache lookups
sub get_wof {
	my ($properties, $id) = @_;

	state %l1_cache;
	state $l2_cache;

	return if($id < 0);
	if($l1_cache{$id}) {
		# my @call_details = caller(0);
		# print "get_wof: stored $l1_cache{$id} at line " . $call_details[2], "\n" if(DEBUG&DEBUG_ALL);
		return $l1_cache{$id};
	}
	if(scalar(keys %l1_cache) >= 50) {
		%l1_cache = ();
	}

	if(defined($l2_cache)) {
		if(my $name = $l2_cache->get($id)) {
			my @call_details = caller(0);
			print "get_wof: cached $name at line ", $call_details[2], "\n" if(DEBUG&DEBUG_GET_WOF);
			$l1_cache{$id} = $name;
			return $name;
		}
	} else {
		$l2_cache = CHI->new(driver => 'RawMemory', global => 0, max_size => 1_000);
	}

	print "get_wof: not cached $id\n" if(DEBUG&DEBUG_GET_WOF);
	my $filename;
	# Unfortunately whosonfirst doesn't tell you the repo of the region_id, just
	# its name, so you need to search all repos to find it
	# https://github.com/whosonfirst-data/whosonfirst-data/issues/1844
	if($id =~ /(\d{3})(\d{3})(\d{3})(\d+)/) {
		$filename = $ENV{'WHOSONFIRST_HOME'} . "/*/data/$1/$2/$3/$4/$id.geojson";
	} elsif($id =~ /(\d{3})(\d{3})(\d+)/) {
		$filename = $ENV{'WHOSONFIRST_HOME'} . "/*/data/$1/$2/$3/$id.geojson";
	} else {
		die "Can't parse $id";
	}
	my @filelist = <"$filename">;
	$filename = $filelist[0];
	if(!$filename) {
		# Don't die here
		print "Can't find $id file for ", $properties->{'wof:name'} if(DEBUG&DEBUG_GET_WOF);
		return;
	}

	my $j = JSON::MaybeXS->new()->utf8();
	my $data = File::Slurp::read_file($filename);
	$data = $j->decode($data);
	my $properties = $data->{'properties'};
	return if(scalar(@{$properties->{'wof:superseded_by'}}));
	return if(exists($properties->{'mz:is_current'}) && ($properties->{'mz:is_current'} == 0));
	# if($properties->{'wof:name'}) {
		if($properties->{'wof:placetype'} eq 'region') {
			my $country = uc($properties->{'wof:country'});
			if(($country eq 'US') || ($country eq 'CA') || ($country eq 'AU')) {
				return $l1_cache{$id} = $l2_cache->set($id, $properties->{'wof:abbreviation'} || $properties->{'wof:shortcode'} || $properties->{'wof:name'}, '1 minute');
			}
		}
		return $l1_cache{$id} = $l2_cache->set($id, $properties->{'wof:name'}, '1 minute');
	# }
}
