[postgis-commits] svn - r2642 - in trunk/extras/tiger_geocoder: .
geocode normalize
postgis-commits at postgis.refractions.net
postgis-commits at postgis.refractions.net
Tue Jul 3 14:30:34 PDT 2007
Author: snowman
Date: 2007-07-03 14:30:34 -0700 (Tue, 03 Jul 2007)
New Revision: 2642
Added:
trunk/extras/tiger_geocoder/create_geocode.sql
trunk/extras/tiger_geocoder/geocode/
trunk/extras/tiger_geocoder/geocode/geocode.sql
trunk/extras/tiger_geocoder/geocode/geocode_address.sql
trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_exact.sql
trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_fuzzy.sql
trunk/extras/tiger_geocoder/geocode/geocode_address_place_exact.sql
trunk/extras/tiger_geocoder/geocode/geocode_address_place_fuzzy.sql
trunk/extras/tiger_geocoder/geocode/geocode_address_state.sql
trunk/extras/tiger_geocoder/geocode/geocode_address_zip.sql
trunk/extras/tiger_geocoder/geocode/geocode_get_point.sql
trunk/extras/tiger_geocoder/geocode/geocode_location.sql
trunk/extras/tiger_geocoder/geocode/geocode_zip.sql
trunk/extras/tiger_geocoder/geocode/includes_address.sql
trunk/extras/tiger_geocoder/geocode/interpolate_from_address.sql
trunk/extras/tiger_geocoder/geocode/rate_attributes.sql
trunk/extras/tiger_geocoder/normalize/
trunk/extras/tiger_geocoder/normalize/count_words.sql
trunk/extras/tiger_geocoder/normalize/end_soundex.sql
trunk/extras/tiger_geocoder/normalize/get_last_words.sql
trunk/extras/tiger_geocoder/normalize/location_extract.sql
trunk/extras/tiger_geocoder/normalize/location_extract_countysub_exact.sql
trunk/extras/tiger_geocoder/normalize/location_extract_countysub_fuzzy.sql
trunk/extras/tiger_geocoder/normalize/location_extract_place_exact.sql
trunk/extras/tiger_geocoder/normalize/location_extract_place_fuzzy.sql
trunk/extras/tiger_geocoder/normalize/normalize_address.sql
trunk/extras/tiger_geocoder/normalize/state_extract.sql
Log:
Add in broken out/updated normalize/geocode functions
- create_geocode.sql:
Main creation script for pulling in all the other
.sql files to create all the functions and whatnot
- normalize/
Normalization routines, includes mapping 'North' -> 'N',
'Virginia' -> 'VA', etc, etc.
- geocode/
Actual geocoding routines to find the point geometry of the
address. Includes interpolation across the linestring found
for the location (perhaps not the best), and fallbacks to
zip-code and city, state matches using the associated lookup
tables for those. Also currently returns a set rather than
a cursor, that's up for some debate but the cursor makes it
difficult to do things like fallback, imv. Especially since
references to it from another pl/pgsql function require it to
be a specific record type across multiple calls. That's
currently a problem. :/
Added: trunk/extras/tiger_geocoder/create_geocode.sql
===================================================================
--- trunk/extras/tiger_geocoder/create_geocode.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/create_geocode.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,58 @@
+
+-- Tiger is where we're going to create the functions, but we need
+-- the PostGIS functions/types which are in public.
+SET search_path TO tiger,public;
+
+-- Type used to pass around a normalized address between functions
+DROP TYPE IF EXISTS norm_addy CASCADE;
+CREATE TYPE norm_addy AS (
+ address INTEGER,
+ preDirAbbrev VARCHAR,
+ streetName VARCHAR,
+ streetTypeAbbrev VARCHAR,
+ postDirAbbrev VARCHAR,
+ internal VARCHAR,
+ location VARCHAR,
+ stateAbbrev VARCHAR,
+ zip INTEGER,
+ parsed BOOLEAN);
+
+-- System/General helper functions
+\i utility/utmzone.sql
+\i utility/cull_null.sql
+\i utility/nullable_levenshtein.sql
+\i utility/levenshtein_ignore_case.sql
+
+---- Address normalizer
+-- General helpers
+\i normalize/end_soundex.sql
+\i normalize/count_words.sql
+\i normalize/state_extract.sql
+\i normalize/get_last_words.sql
+-- Location extraction/normalization helpers
+\i normalize/location_extract_countysub_exact.sql
+\i normalize/location_extract_countysub_fuzzy.sql
+\i normalize/location_extract_place_exact.sql
+\i normalize/location_extract_place_fuzzy.sql
+\i normalize/location_extract.sql
+-- Normalization API, called by geocode mainly.
+\i normalize/normalize_address.sql
+
+---- Geocoder functions
+-- General helpers
+\i geocode/rate_attributes.sql
+\i geocode/includes_address.sql
+\i geocode/interpolate_from_address.sql
+-- Actual lookups/geocoder helpers
+\i geocode/geocode_address_countysub_exact.sql
+\i geocode/geocode_address_countysub_fuzzy.sql
+\i geocode/geocode_address_place_exact.sql
+\i geocode/geocode_address_place_fuzzy.sql
+\i geocode/geocode_address.sql
+\i geocode/geocode_address_state.sql
+\i geocode/geocode_address_zip.sql
+\i geocode/geocode_zip.sql
+\i geocode/geocode_location.sql
+-- Geocode API, called by user
+\i geocode/geocode_get_point.sql
+\i geocode/geocode.sql
Added: trunk/extras/tiger_geocoder/geocode/geocode.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,85 @@
+CREATE OR REPLACE FUNCTION geocode(
+ input VARCHAR,
+ OUT NORM_ADDY VARCHAR,
+ OUT GEOMOUT GEOMETRY,
+ OUT RATING INTEGER
+) RETURNS SETOF RECORD
+AS $_$
+DECLARE
+ parsed norm_addy;
+ result REFCURSOR;
+ rec RECORD;
+BEGIN
+
+ IF input IS NULL THEN
+ RETURN;
+ END IF;
+
+ -- Pass the input string into the address normalizer
+ parsed := normalize_address(input);
+ IF NOT parsed.parsed THEN
+ RETURN;
+ END IF;
+
+ -- Go for the full monty if we've got enough info
+ IF parsed.address IS NOT NULL AND
+ parsed.streetName IS NOT NULL AND
+ (parsed.zip IS NOT NULL OR parsed.stateAbbrev IS NOT NULL) THEN
+
+ result := geocode_address(parsed);
+ END IF;
+
+ -- Next best is zipcode, if we've got it
+ IF result IS NULL AND parsed.zip IS NOT NULL THEN
+ result := geocode_zip(parsed);
+ END IF;
+
+ -- No zip code, try state/location, need both or we'll get too much stuffs.
+ IF result IS NULL AND parsed.stateAbbrev IS NOT NULL AND parsed.location IS NOT NULL THEN
+ result := geocode_location(parsed);
+ END IF;
+
+ IF result IS NULL THEN
+ RETURN;
+ END IF;
+
+ ans := false;
+ LOOP
+ FETCH result INTO rec;
+
+ IF NOT FOUND THEN
+ RETURN;
+ END IF;
+
+ NORM_ADDY := cull_null(parsed.address::text)
+ || CASE WHEN rec.fedirp IS NOT NULL THEN ' ' ELSE '' END
+ || cull_null(rec.fedirp)
+ || CASE WHEN rec.fename IS NOT NULL THEN ' ' ELSE '' END
+ || cull_null(rec.fename)
+ || CASE WHEN rec.fetype IS NOT NULL THEN ' ' ELSE '' END
+ || cull_null(rec.fetype)
+ || CASE WHEN rec.fedirs IS NOT NULL THEN ' ' ELSE '' END
+ || cull_null(rec.fedirs)
+ || CASE WHEN
+ parsed.address IS NOT NULL OR
+ rec.fename IS NOT NULL
+ THEN ', ' ELSE '' END
+ || cull_null(parsed.internal)
+ || CASE WHEN parsed.internal IS NOT NULL THEN ', ' ELSE '' END
+ || cull_null(rec.place)
+ || CASE WHEN rec.place IS NOT NULL THEN ', ' ELSE '' END
+ || cull_null(rec.state)
+ || CASE WHEN rec.state IS NOT NULL THEN ' ' ELSE '' END
+ || cull_null(lpad(rec.zip,5,'0'));
+
+ GEOMOUT := rec.address_geom;
+ RATING := rec.rating;
+
+ RETURN NEXT;
+ END IF;
+ END LOOP;
+
+ RETURN;
+
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_address.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,94 @@
+-- geocode(cursor, address, directionPrefix, streetName,
+-- streetTypeAbbreviation, directionSuffix, location, stateAbbreviation,
+-- zipCode)
+CREATE OR REPLACE FUNCTION geocode_address(
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ result REFCURSOR;
+ tempString VARCHAR;
+ ziplookup RECORD;
+BEGIN
+ -- The first step is to determine what weve been given, and if its enough.
+ IF parsed.address IS NULL THEN
+ -- The address is manditory.
+ -- Without it, wed be wandering into strangers homes all the time.
+ RETURN NULL;
+ END IF;
+
+ IF parsed.streetName IS NULL THEN
+ -- A street name must be given. Think about it.
+ RETURN NULL;
+ END IF;
+
+ IF parsed.zip IS NOT NULL THEN
+ -- If the zip code is given, it is the most useful way to narrow the
+ -- search. We will try it first, and if no results match, we will move
+ -- on to a location search. There is no fuzzy searching on zip codes.
+ result := geocode_address_zip(result, parsed);
+ IF result IS NOT NULL THEN
+ RETURN result;
+ END IF;
+ -- If we weren't able to find one using the zip code, but the zip code
+ -- exists, and location is null, then fill in the location and/or state
+ -- based on the zip code so that the location lookup has a chance.
+ IF parsed.stateAbbrev IS NULL OR parsed.location IS NULL THEN
+ SELECT INTO ziplookup * FROM zip_lookup_base JOIN state_lookup ON (state = name) WHERE zip = parsed.zip;
+ IF FOUND THEN
+ parsed.stateAbbrev := coalesce(parsed.stateAbbrev,ziplookup.abbrev);
+ parsed.location := coalesce(parsed.location,ziplookup.city);
+ END IF;
+ END IF;
+ END IF;
+
+ -- After now, the location becomes manditory.
+ IF parsed.location IS NOT NULL THEN
+ -- location may be useful, it may not. The first step is to determine if
+ -- there are any potenial matches in the place and countysub fields.
+ -- This is done against the lookup tables, and will save us time on much
+ -- larger queries if they dont match.
+ tempString := location_extract_place_exact(parsed.location, parsed.stateAbbrev);
+ IF tempString IS NOT NULL THEN
+ result := geocode_address_place_exact(result, parsed);
+ IF result IS NOT NULL THEN
+ RETURN result;
+ END IF;
+ END IF;
+
+ tempString := location_extract_countysub_exact(parsed.location, parsed.stateAbbrev);
+ IF tempString IS NOT NULL THEN
+ result := geocode_address_countysub_exact(result, parsed);
+ IF result IS NOT NULL THEN
+ RETURN result;
+ END IF;
+ END IF;
+
+ tempString := location_extract_place_fuzzy(parsed.location, parsed.stateAbbrev);
+ IF tempString IS NOT NULL THEN
+ result := geocode_address_place_fuzzy(result, parsed);
+ IF result IS NOT NULL THEN
+ RETURN result;
+ END IF;
+ END IF;
+
+ tempString := location_extract_countysub_fuzzy(parsed.location, parsed.stateAbbrev);
+ IF tempString IS NOT NULL THEN
+ result := geocode_address_countysub_fuzzy(result, parsed);
+ IF result IS NOT NULL THEN
+ RETURN result;
+ END IF;
+ END IF;
+ END IF;
+
+ -- Try with just the state if we can't find the location
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ result := geocode_address_state(result, parsed);
+ IF result IS NOT NULL THEN
+ RETURN result;
+ END IF;
+ END IF;
+
+ RETURN NULL;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_exact.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_exact.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_exact.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,161 @@
+CREATE OR REPLACE FUNCTION geocode_address_countysub_exact(
+ result REFCURSOR,
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ result REFCURSOR;
+ tempString VARCHAR;
+ tempInt VARCHAR;
+BEGIN
+ IF parsed.location IS NULL THEN
+ -- location is manditory. This is the location geocoder after all.
+ RETURN NULL;
+ END IF;
+
+ -- Check to see if the road name can be matched.
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.cousub
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state;
+ ELSE
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.cousub
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+ END IF;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ ELSE
+ -- The road name matches, now we check to see if the addresses match
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.cousub
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+ ELSE
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.cousub
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+ END IF;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ ELSE
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs, parsed.location,
+ tiger_geocode_roads.cousub) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.cousub
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+ ELSE
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs, parsed.location,
+ tiger_geocode_roads.cousub) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.cousub
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+ END IF;
+ END IF;
+ END IF;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_fuzzy.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_fuzzy.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_fuzzy.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,162 @@
+CREATE OR REPLACE FUNCTION geocode_address_countysub_fuzzy(
+ result REFCURSOR,
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ result REFCURSOR;
+ tempString VARCHAR;
+ tempInt VARCHAR;
+BEGIN
+ -- The first step is to determine what weve been given, and if its enough.
+ IF parsed.location IS NULL THEN
+ -- location is manditory. This is the location geocoder after all.
+ RETURN NULL;
+ END IF;
+
+ -- Check to see if the road name can be matched.
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state;
+ ELSE
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+ END IF;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ -- The road name matches, now we check to see if the addresses match
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+ ELSE
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+ END IF;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs, parsed.location,
+ tiger_geocode_roads.cousub) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+ ELSE
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs, parsed.location,
+ tiger_geocode_roads.cousub) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+ END IF;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_address_place_exact.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_place_exact.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_place_exact.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,156 @@
+CREATE OR REPLACE FUNCTION geocode_address_place_exact(
+ result REFCURSOR,
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ result REFCURSOR;
+ tempString VARCHAR;
+ tempInt VARCHAR;
+BEGIN
+ -- Check to see if the road name can be matched.
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.place
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state;
+ ELSE
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.place
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+ END IF;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ -- The road name matches, now we check to see if the addresses match
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.place
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+ ELSE
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.place
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+ END IF;
+
+ IF tempInt = 0 THEN
+ return NULL;
+ END IF;
+
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs, parsed.location,
+ tiger_geocode_roads.place) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.place
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+ ELSE
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs, parsed.location,
+ tiger_geocode_roads.place) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.location = tiger_geocode_roads.place
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+ END IF;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_address_place_fuzzy.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_place_fuzzy.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_place_fuzzy.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,155 @@
+CREATE OR REPLACE FUNCTION geocode_address_place_fuzzy(
+ result REFCURSOR,
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ tempString VARCHAR;
+ tempInt VARCHAR;
+BEGIN
+ -- Check to see if the road name can be matched.
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state;
+ ELSE
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+ END IF;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ -- The road name matches, now we check to see if the addresses match
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+ ELSE
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+ END IF;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ IF parsed.stateAbbrev IS NOT NULL THEN
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs, parsed.location,
+ tiger_geocode_roads.place) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+ ELSE
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs, parsed.location,
+ tiger_geocode_roads.place) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+ END IF;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_address_state.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_state.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_state.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,81 @@
+CREATE OR REPLACE FUNCTION geocode_address_state(
+ result REFCURSOR,
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ tempInt VARCHAR;
+BEGIN
+ -- Check to see if the road name can be matched.
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ -- The road name matches, now we check to see if the addresses match
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+
+ IF tempInt = 0 THEN
+ return NULL;
+ END IF;
+
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ AND parsed.stateAbbrev = tiger_geocode_roads.state
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+ RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_address_zip.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_zip.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_zip.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,83 @@
+CREATE OR REPLACE FUNCTION geocode_address_zip(
+ result REFCURSOR,
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ tempString VARCHAR;
+ tempInt VARCHAR;
+BEGIN
+ -- Check to see if the road name can be matched.
+ SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+ WHERE parsed.zip = tiger_geocode_roads.zip
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ -- The road name matches, now we check to see if the addresses match
+ SELECT INTO tempInt count(*)
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.zip = tiger_geocode_roads.zip
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery, roads_local
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ AND subquery.tlid = roads_local.tlid;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ OPEN result FOR
+ SELECT
+ roads_local.fedirp as fedirp,
+ roads_local.fename as fename,
+ roads_local.fetype as fetype,
+ roads_local.fedirs as fedirs,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN sl.abbrev ELSE sr.abbrev END as state,
+ CASE WHEN (parsed.address % 2) = roads_local.fraddl
+ OR (parsed.address % 2) = roads_local.toaddl
+ THEN zipl ELSE zipr END as zip,
+ interpolate_from_address(parsed.address, roads_local.fraddl,
+ roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+ roads_local.geom) as address_geom,
+ subquery.rating as rating
+ FROM (
+ SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+ parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+ tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+ tiger_geocode_roads.fedirs) as rating
+ FROM tiger_geocode_roads
+ WHERE parsed.zip = tiger_geocode_roads.zip
+ AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+ ) AS subquery
+ JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+ JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+ JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+ LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+ LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+ LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+ LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+ LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+ LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+ LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+ LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+ WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+ roads_local.fraddr, roads_local.toaddr)
+ ORDER BY subquery.rating;
+
+ RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_get_point.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_get_point.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_get_point.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,10 @@
+CREATE OR REPLACE FUNCTION geocode_get_point(VARCHAR) RETURNS GEOMETRY
+AS $_$
+DECLARE
+ ans RECORD;
+BEGIN
+ ans := geocode(NULL, $1);
+
+ RETURN centroid(ans.geom);
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_location.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_location.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_location.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,67 @@
+CREATE OR REPLACE FUNCTION geocode_location(
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ result REFCURSOR;
+ tempString VARCHAR;
+ tempInt VARCHAR;
+BEGIN
+ -- Try to match the city/state to a zipcode first
+ SELECT INTO tempInt count(*)
+ FROM zip_lookup_base zip
+ JOIN state_lookup sl ON (zip.state = sl.name)
+ JOIN zt99_d00 zl ON (lpad(zip.zip,5,'0') = zl.zcta)
+ WHERE soundex(zip.city) = soundex(parsed.location) and sl.abbrev = parsed.stateAbbrev;
+
+ -- If that worked, just use the zipcode lookup
+ IF tempInt > 0 THEN
+ OPEN result FOR
+ SELECT
+ NULL::varchar(2) as fedirp,
+ NULL::varchar(30) as fename,
+ NULL::varchar(4) as fetype,
+ NULL::varchar(2) as fedirs,
+ coalesce(zip.city) as place,
+ sl.abbrev as state,
+ parsed.zip as zip,
+ centroid(wkb_geometry) as address_geom,
+ 100::integer as rating
+ FROM
+ zip_lookup_base zip
+ JOIN state_lookup sl on (zip.state = sl.name)
+ JOIN zt99_d00 zl ON (lpad(zip.zip,5,'0') = zl.zcta)
+ WHERE
+ soundex(zip.city) = soundex(parsed.location) and sl.abbrev = parsed.stateAbbrev;
+
+ RETURN result;
+ END IF;
+
+ -- Try to match the city/state to a place next
+ SELECT INTO tempInt count(*)
+ FROM pl99_d00 pl
+ JOIN state_lookup sl ON (pl.state = lpad(sl.st_code,2,'0'))
+ WHERE soundex(pl.name) = soundex(parsed.location) and sl.abbrev = parsed.stateAbbrev;
+
+ -- If that worked, just use the zipcode lookup
+ IF tempInt > 0 THEN
+ OPEN result FOR
+ SELECT
+ NULL::varchar(2) as fedirp,
+ NULL::varchar(30) as fename,
+ NULL::varchar(4) as fetype,
+ NULL::varchar(2) as fedirs,
+ pl.name as place,
+ sl.abbrev as state,
+ NULL::integer as zip,
+ centroid(wkb_geometry) as address_geom,
+ 100::integer as rating
+ FROM pl99_d00 pl
+ JOIN state_lookup sl ON (pl.state = lpad(sl.st_code,2,'0'))
+ WHERE soundex(pl.name) = soundex(parsed.location) and sl.abbrev = parsed.stateAbbrev;
+
+ RETURN result;
+ END IF;
+ RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/geocode_zip.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_zip.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_zip.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,41 @@
+CREATE OR REPLACE FUNCTION geocode_zip(
+ parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+ result REFCURSOR;
+ tempString VARCHAR;
+ tempInt VARCHAR;
+BEGIN
+ -- Check to see if the road name can be matched.
+ SELECT INTO tempInt count(*)
+ FROM zip_lookup_base zip
+ JOIN state_lookup sl on (zip.state = sl.name)
+ JOIN zt99_d00 zl ON (lpad(zip.zip,5,'0') = zl.zcta)
+ WHERE zip = parsed.zip;
+
+ IF tempInt = 0 THEN
+ RETURN NULL;
+ END IF;
+
+ OPEN result FOR
+ SELECT
+ NULL::varchar(2) as fedirp,
+ NULL::varchar(30) as fename,
+ NULL::varchar(4) as fetype,
+ NULL::varchar(2) as fedirs,
+ coalesce(zip.city) as place,
+ sl.abbrev as state,
+ parsed.zip as zip,
+ centroid(wkb_geometry) as address_geom,
+ 100::integer as rating
+ FROM
+ zip_lookup_base zip
+ JOIN state_lookup sl on (zip.state = sl.name)
+ JOIN zt99_d00 zl ON (lpad(zip.zip,5,'0') = zl.zcta)
+ WHERE
+ zip.zip = parsed.zip;
+
+ RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/includes_address.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/includes_address.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/includes_address.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,93 @@
+-- This function requires the addresses to be grouped, such that the second and
+-- third arguments are from one side of the street, and the fourth and fifth
+-- from the other.
+CREATE OR REPLACE FUNCTION includes_address(
+ given_address INTEGER,
+ addr1 INTEGER,
+ addr2 INTEGER,
+ addr3 INTEGER,
+ addr4 INTEGER
+) RETURNS BOOLEAN
+AS $_$
+DECLARE
+ lmaxaddr INTEGER := -1;
+ rmaxaddr INTEGER := -1;
+ lminaddr INTEGER := -1;
+ rminaddr INTEGER := -1;
+ maxaddr INTEGER := -1;
+ minaddr INTEGER := -1;
+ verbose BOOLEAN := false;
+BEGIN
+ IF addr1 IS NOT NULL THEN
+ maxaddr := addr1;
+ minaddr := addr1;
+ lmaxaddr := addr1;
+ lminaddr := addr1;
+ END IF;
+
+ IF addr2 IS NOT NULL THEN
+ IF addr2 < minaddr OR minaddr = -1 THEN
+ minaddr := addr2;
+ END IF;
+ IF addr2 > maxaddr OR maxaddr = -1 THEN
+ maxaddr := addr2;
+ END IF;
+ IF addr2 > lmaxaddr OR lmaxaddr = -1 THEN
+ lmaxaddr := addr2;
+ END IF;
+ IF addr2 < lminaddr OR lminaddr = -1 THEN
+ lminaddr := addr2;
+ END IF;
+ END IF;
+
+ IF addr3 IS NOT NULL THEN
+ IF addr3 < minaddr OR minaddr = -1 THEN
+ minaddr := addr3;
+ END IF;
+ IF addr3 > maxaddr OR maxaddr = -1 THEN
+ maxaddr := addr3;
+ END IF;
+ rmaxaddr := addr3;
+ rminaddr := addr3;
+ END IF;
+
+ IF addr4 IS NOT NULL THEN
+ IF addr4 < minaddr OR minaddr = -1 THEN
+ minaddr := addr4;
+ END IF;
+ IF addr4 > maxaddr OR maxaddr = -1 THEN
+ maxaddr := addr4;
+ END IF;
+ IF addr4 > rmaxaddr OR rmaxaddr = -1 THEN
+ rmaxaddr := addr4;
+ END IF;
+ IF addr4 < rminaddr OR rminaddr = -1 THEN
+ rminaddr := addr4;
+ END IF;
+ END IF;
+
+ IF minaddr = -1 OR maxaddr = -1 THEN
+ -- No addresses were non-null, return FALSE (arbitrary)
+ RETURN FALSE;
+ ELSIF given_address >= minaddr AND given_address <= maxaddr THEN
+ -- The address is within the given range
+ IF given_address >= lminaddr AND given_address <= lmaxaddr THEN
+ -- This checks to see if the address is on this side of the
+ -- road, ie if the address is even, the street range must be even
+ IF (given_address % 2) = (lminaddr % 2)
+ OR (given_address % 2) = (lmaxaddr % 2) THEN
+ RETURN TRUE;
+ END IF;
+ END IF;
+ IF given_address >= rminaddr AND given_address <= rmaxaddr THEN
+ -- See above
+ IF (given_address % 2) = (rminaddr % 2)
+ OR (given_address % 2) = (rmaxaddr % 2) THEN
+ RETURN TRUE;
+ END IF;
+ END IF;
+ END IF;
+ -- The address is not within the range
+ RETURN FALSE;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/interpolate_from_address.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/interpolate_from_address.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/interpolate_from_address.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,123 @@
+-- This function converts string addresses to integers and passes them to
+-- the other interpolate_from_address function.
+CREATE OR REPLACE FUNCTION interpolate_from_address(INTEGER, VARCHAR, VARCHAR, VARCHAR, VARCHAR, GEOMETRY) RETURNS GEOMETRY
+AS $_$
+DECLARE
+ given_address INTEGER;
+ addr1 INTEGER;
+ addr2 INTEGER;
+ addr3 INTEGER;
+ addr4 INTEGER;
+ road GEOMETRY;
+ result GEOMETRY;
+BEGIN
+ given_address := $1;
+ addr1 := to_number($2, '999999');
+ addr2 := to_number($3, '999999');
+ addr3 := to_number($4, '999999');
+ addr4 := to_number($5, '999999');
+ road := $6;
+ result = interpolate_from_address(given_address, addr1, addr2, addr3, addr4, road);
+ RETURN result;
+END
+$_$ LANGUAGE plpgsql;
+
+-- interpolate_from_address(local_address, from_address_l, to_address_l, from_address_r, to_address_r, local_road)
+-- This function returns a point along the given geometry (must be linestring)
+-- corresponding to the given address. If the given address is not within
+-- the address range of the road, null is returned.
+-- This function requires that the address be grouped, such that the second and
+-- third arguments are from one side of the street, while the fourth and
+-- fifth are from the other.
+CREATE OR REPLACE FUNCTION interpolate_from_address(INTEGER, INTEGER, INTEGER, INTEGER, INTEGER, GEOMETRY) RETURNS GEOMETRY
+AS $_$
+DECLARE
+ given_address INTEGER;
+ lmaxaddr INTEGER := -1;
+ rmaxaddr INTEGER := -1;
+ lminaddr INTEGER := -1;
+ rminaddr INTEGER := -1;
+ lfrgreater BOOLEAN;
+ rfrgreater BOOLEAN;
+ frgreater BOOLEAN;
+ addrwidth INTEGER;
+ part DOUBLE PRECISION;
+ road GEOMETRY;
+ result GEOMETRY;
+BEGIN
+ IF $1 IS NULL THEN
+ RETURN NULL;
+ ELSE
+ given_address := $1;
+ END IF;
+
+ IF $6 IS NULL THEN
+ RETURN NULL;
+ ELSE
+ IF geometrytype($6) = 'LINESTRING' THEN
+ road := $6;
+ ELSIF geometrytype($6) = 'MULTILINESTRING' THEN
+ road := geometryn($6,1);
+ ELSE
+ RETURN NULL;
+ END IF;
+ END IF;
+
+ IF $2 IS NOT NULL THEN
+ lfrgreater := TRUE;
+ lmaxaddr := $2;
+ lminaddr := $2;
+ END IF;
+
+ IF $3 IS NOT NULL THEN
+ IF $3 > lmaxaddr OR lmaxaddr = -1 THEN
+ lmaxaddr := $3;
+ lfrgreater := FALSE;
+ END IF;
+ IF $3 < lminaddr OR lminaddr = -1 THEN
+ lminaddr := $3;
+ END IF;
+ END IF;
+
+ IF $4 IS NOT NULL THEN
+ rmaxaddr := $4;
+ rminaddr := $4;
+ rfrgreater := TRUE;
+ END IF;
+
+ IF $5 IS NOT NULL THEN
+ IF $5 > rmaxaddr OR rmaxaddr = -1 THEN
+ rmaxaddr := $5;
+ rfrgreater := FALSE;
+ END IF;
+ IF $5 < rminaddr OR rminaddr = -1 THEN
+ rminaddr := $5;
+ END IF;
+ END IF;
+
+ IF given_address >= lminaddr AND given_address <= lmaxaddr THEN
+ IF (given_address % 2) = (lminaddr % 2)
+ OR (given_address % 2) = (lmaxaddr % 2) THEN
+ addrwidth := lmaxaddr - lminaddr;
+ part := (given_address - lminaddr) / trunc(addrwidth, 1);
+ frgreater := lfrgreater;
+ END IF;
+ END IF;
+
+ IF given_address >= rminaddr AND given_address <= rmaxaddr THEN
+ IF (given_address % 2) = (rminaddr % 2)
+ OR (given_address % 2) = (rmaxaddr % 2) THEN
+ addrwidth := rmaxaddr - rminaddr;
+ part := (given_address - rminaddr) / trunc(addrwidth, 1);
+ frgreater := rfrgreater;
+ END IF;
+ END IF;
+
+ IF frgreater THEN
+ part := 1 - part;
+ END IF;
+
+ result = line_interpolate_point(road, part);
+ RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/geocode/rate_attributes.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/rate_attributes.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/rate_attributes.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,58 @@
+-- rate_attributes(dirpA, dirpB, streetNameA, streetNameB, streetTypeA,
+-- streetTypeB, dirsA, dirsB, locationA, locationB)
+-- Rates the street based on the given attributes. The locations must be
+-- non-null. The other eight values are handled by the other rate_attributes
+-- function, so it's requirements must also be met.
+CREATE OR REPLACE FUNCTION rate_attributes(VARCHAR, VARCHAR, VARCHAR, VARCHAR,
+ VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR) RETURNS INTEGER
+AS $_$
+DECLARE
+ result INTEGER := 0;
+ locationWeight INTEGER := 14;
+ verbose BOOLEAN := FALSE;
+BEGIN
+ IF $9 IS NOT NULL AND $10 IS NOT NULL THEN
+ result := levenshtein_ignore_case($9, $10);
+ ELSE
+ IF verbose THEN
+ RAISE NOTICE 'rate_attributes() - Location names cannot be null!';
+ END IF;
+ RETURN NULL;
+ END IF;
+ result := result + rate_attributes($1, $2, $3, $4, $5, $6, $7, $8);
+ RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
+
+-- rate_attributes(dirpA, dirpB, streetNameA, streetNameB, streetTypeA,
+-- streetTypeB, dirsA, dirsB)
+-- Rates the street based on the given attributes. Only streetNames are
+-- required. If any others are null (either A or B) they are treated as
+-- empty strings.
+CREATE OR REPLACE FUNCTION rate_attributes(VARCHAR, VARCHAR, VARCHAR, VARCHAR,
+ VARCHAR, VARCHAR, VARCHAR, VARCHAR) RETURNS INTEGER
+AS $_$
+DECLARE
+ result INTEGER := 0;
+ directionWeight INTEGER := 2;
+ nameWeight INTEGER := 10;
+ typeWeight INTEGER := 5;
+ verbose BOOLEAN := FALSE;
+BEGIN
+ result := result + levenshtein_ignore_case(cull_null($1), cull_null($2)) *
+ directionWeight;
+ IF $3 IS NOT NULL AND $4 IS NOT NULL THEN
+ result := result + levenshtein_ignore_case($3, $4) * nameWeight;
+ ELSE
+ IF verbose THEN
+ RAISE NOTICE 'rate_attributes() - Street names cannot be null!';
+ END IF;
+ RETURN NULL;
+ END IF;
+ result := result + levenshtein_ignore_case(cull_null($5), cull_null($6)) *
+ typeWeight;
+ result := result + levenshtein_ignore_case(cull_null($7), cull_null($7)) *
+ directionWeight;
+ return result;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/count_words.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/count_words.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/count_words.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,32 @@
+-- Determine the number of words in a string. Words are allowed to
+-- be seperated only by spaces, but multiple spaces between
+-- words are allowed.
+CREATE OR REPLACE FUNCTION count_words(VARCHAR) RETURNS INTEGER
+AS $_$
+DECLARE
+ tempString VARCHAR;
+ tempInt INTEGER;
+ count INTEGER := 1;
+ lastSpace BOOLEAN := FALSE;
+BEGIN
+ IF $1 IS NULL THEN
+ return -1;
+ END IF;
+ tempInt := length($1);
+ IF tempInt = 0 THEN
+ return 0;
+ END IF;
+ FOR i IN 1..tempInt LOOP
+ tempString := substring($1 from i for 1);
+ IF tempString = ' ' THEN
+ IF NOT lastSpace THEN
+ count := count + 1;
+ END IF;
+ lastSpace := TRUE;
+ ELSE
+ lastSpace := FALSE;
+ END IF;
+ END LOOP;
+ return count;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/end_soundex.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/end_soundex.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/end_soundex.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,17 @@
+-- Runs the soundex function on the last word in the string provided.
+-- Words are allowed to be seperated by space, comma, period, new-line
+-- tab or form feed.
+CREATE OR REPLACE FUNCTION end_soundex(VARCHAR) RETURNS VARCHAR
+AS $_$
+DECLARE
+ tempString VARCHAR;
+BEGIN
+ tempString := substring($1, E'[ ,.\n\t\f]([a-zA-Z0-9]*)$');
+ IF tempString IS NOT NULL THEN
+ tempString := soundex(tempString);
+ ELSE
+ tempString := soundex($1);
+ END IF;
+ return tempString;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/get_last_words.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/get_last_words.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/get_last_words.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,29 @@
+-- Returns a string consisting of the last N words. Words are allowed
+-- to be seperated only by spaces, but multiple spaces between
+-- words are allowed. Words must be alphanumberic.
+-- If more words are requested than exist, the full input string is
+-- returned.
+CREATE OR REPLACE FUNCTION get_last_words(
+ inputString VARCHAR,
+ count INTEGER
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+ tempString VARCHAR;
+ result VARCHAR := '';
+BEGIN
+ FOR i IN 1..count LOOP
+ tempString := substring(inputString from '((?: )+[a-zA-Z0-9_]*)' || result || '$');
+
+ IF tempString IS NULL THEN
+ RETURN inputString;
+ END IF;
+
+ result := tempString || result;
+ END LOOP;
+
+ result := trim(both from result);
+
+ RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/location_extract.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,36 @@
+-- location_extract(streetAddressString, stateAbbreviation)
+-- This function extracts a location name from the end of the given string.
+-- The first attempt is to find an exact match against the place_lookup
+-- table. If this fails, a word-by-word soundex match is tryed against the
+-- same table. If multiple candidates are found, the one with the smallest
+-- levenshtein distance from the given string is assumed the correct one.
+-- If no match is found against the place_lookup table, the same tests are
+-- run against the countysub_lookup table.
+--
+-- The section of the given string corresponding to the location found is
+-- returned, rather than the string found from the tables. All the searching
+-- is done largely to determine the length (words) of the location, to allow
+-- the intended street name to be correctly identified.
+CREATE OR REPLACE FUNCTION location_extract(fullStreet VARCHAR, stateAbbrev VARCHAR) RETURNS VARCHAR
+AS $_$
+DECLARE
+ location VARCHAR;
+BEGIN
+ IF fullStreet IS NULL THEN
+ RETURN NULL;
+ END IF;
+
+ location := location_extract_place_exact(fullStreet, stateAbbrev);
+ IF location IS NULL THEN
+ location := location_extract_countysub_exact(fullStreet, stateAbbrev);
+ IF location IS NULL THEN
+ location := location_extract_place_fuzzy(fullStreet, stateAbbrev);
+ IF location IS NULL THEN
+ location := location_extract_countysub_fuzzy(fullStreet, stateAbbrev);
+ END IF;
+ END IF;
+ END IF;
+
+ RETURN location;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/location_extract_countysub_exact.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract_countysub_exact.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract_countysub_exact.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,54 @@
+-- location_extract_countysub_exact(string, stateAbbrev)
+-- This function checks the place_lookup table to find a potential match to
+-- the location described at the end of the given string. If an exact match
+-- fails, a fuzzy match is performed. The location as found in the given
+-- string is returned.
+CREATE OR REPLACE FUNCTION location_extract_countysub_exact(
+ fullStreet VARCHAR,
+ stateAbbrev VARCHAR
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+ ws VARCHAR;
+ location VARCHAR;
+ tempInt INTEGER;
+ rec RECORD;
+BEGIN
+ ws := E'[ ,.\n\f\t]';
+
+ -- No hope of determining the location from place. Try countysub.
+ IF stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*) FROM countysub_lookup
+ WHERE countysub_lookup.state = stateAbbrev
+ AND texticregexeq(fullStreet, '(?i)' || name || '$');
+ ELSE
+ SELECT INTO tempInt count(*) FROM countysub_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || name || '$');
+ END IF;
+
+ IF tempInt > 0 THEN
+ IF stateAbbrev IS NOT NULL THEN
+ FOR rec IN SELECT substring(fullStreet, '(?i)('
+ || name || ')$') AS value, name FROM countysub_lookup
+ WHERE countysub_lookup.state = stateAbbrev
+ AND texticregexeq(fullStreet, '(?i)' || ws || name ||
+ '$') ORDER BY length(name) DESC LOOP
+ -- Only the first result is needed.
+ location := rec.value;
+ EXIT;
+ END LOOP;
+ ELSE
+ FOR rec IN SELECT substring(fullStreet, '(?i)('
+ || name || ')$') AS value, name FROM countysub_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || name ||
+ '$') ORDER BY length(name) DESC LOOP
+ -- again, only the first is needed.
+ location := rec.value;
+ EXIT;
+ END LOOP;
+ END IF;
+ END IF;
+
+ RETURN location;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/location_extract_countysub_fuzzy.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract_countysub_fuzzy.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract_countysub_fuzzy.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,87 @@
+-- location_extract_countysub_fuzzy(string, stateAbbrev)
+-- This function checks the place_lookup table to find a potential match to
+-- the location described at the end of the given string. If an exact match
+-- fails, a fuzzy match is performed. The location as found in the given
+-- string is returned.
+CREATE OR REPLACE FUNCTION location_extract_countysub_fuzzy(
+ fullStreet VARCHAR,
+ stateAbbrev VARCHAR
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+ ws VARCHAR;
+ tempString VARCHAR;
+ location VARCHAR;
+ tempInt INTEGER;
+ word_count INTEGER;
+ rec RECORD;
+ test BOOLEAN;
+BEGIN
+ ws := E'[ ,.\n\f\t]';
+
+ -- Fuzzy matching.
+ tempString := substring(fullStreet, '(?i)' || ws ||
+ '([a-zA-Z0-9]+)$');
+ IF tempString IS NULL THEN
+ tempString := fullStreet;
+ END IF;
+
+ IF stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*) FROM countysub_lookup
+ WHERE countysub_lookup.state = stateAbbrev
+ AND soundex(tempString) = end_soundex(name);
+ ELSE
+ SELECT INTO tempInt count(*) FROM countysub_lookup
+ WHERE soundex(tempString) = end_soundex(name);
+ END IF;
+
+ IF tempInt > 0 THEN
+ tempInt := 50;
+ -- Some potentials were found. Begin a word-by-word soundex on each.
+ IF stateAbbrev IS NOT NULL THEN
+ FOR rec IN SELECT name FROM countysub_lookup
+ WHERE countysub_lookup.state = stateAbbrev
+ AND soundex(tempString) = end_soundex(name) LOOP
+ word_count := count_words(rec.name);
+ test := TRUE;
+ tempString := get_last_words(fullStreet, word_count);
+ FOR i IN 1..word_count LOOP
+ IF soundex(split_part(tempString, ' ', i)) !=
+ soundex(split_part(rec.name, ' ', i)) THEN
+ test := FALSE;
+ END IF;
+ END LOOP;
+ IF test THEN
+ -- The soundex matched, determine if the distance is better.
+ IF levenshtein_ignore_case(rec.name, tempString) < tempInt THEN
+ location := tempString;
+ tempInt := levenshtein_ignore_case(rec.name, tempString);
+ END IF;
+ END IF;
+ END LOOP;
+ ELSE
+ FOR rec IN SELECT name FROM countysub_lookup
+ WHERE soundex(tempString) = end_soundex(name) LOOP
+ word_count := count_words(rec.name);
+ test := TRUE;
+ tempString := get_last_words(fullStreet, word_count);
+ FOR i IN 1..word_count LOOP
+ IF soundex(split_part(tempString, ' ', i)) !=
+ soundex(split_part(rec.name, ' ', i)) THEN
+ test := FALSE;
+ END IF;
+ END LOOP;
+ IF test THEN
+ -- The soundex matched, determine if the distance is better.
+ IF levenshtein_ignore_case(rec.name, tempString) < tempInt THEN
+ location := tempString;
+ tempInt := levenshtein_ignore_case(rec.name, tempString);
+ END IF;
+ END IF;
+ END LOOP;
+ END IF;
+ END IF; -- If no fuzzys were found, leave location null.
+
+ RETURN location;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/location_extract_place_exact.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract_place_exact.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract_place_exact.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,57 @@
+-- location_extract_place_exact(string, stateAbbrev)
+-- This function checks the place_lookup table to find a potential match to
+-- the location described at the end of the given string. If an exact match
+-- fails, a fuzzy match is performed. The location as found in the given
+-- string is returned.
+CREATE OR REPLACE FUNCTION location_extract_place_exact(
+ fullStreet VARCHAR,
+ stateAbbrev VARCHAR
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+ ws VARCHAR;
+ location VARCHAR;
+ tempInt INTEGER;
+ rec RECORD;
+BEGIN
+ ws := E'[ ,.\n\f\t]';
+
+ -- Try for an exact match against places
+ IF stateAbbrev IS NOT NULL THEN
+ SELECT INTO tempInt count(*) FROM place_lookup
+ WHERE place_lookup.state = stateAbbrev
+ AND texticregexeq(fullStreet, '(?i)' || name || '$');
+ ELSE
+ SELECT INTO tempInt count(*) FROM place_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || name || '$');
+ END IF;
+
+ IF tempInt > 0 THEN
+ -- Some matches were found. Look for the last one in the string.
+ IF stateAbbrev IS NOT NULL THEN
+ FOR rec IN SELECT substring(fullStreet, '(?i)('
+ || name || ')$') AS value, name FROM place_lookup
+ WHERE place_lookup.state = stateAbbrev
+ AND texticregexeq(fullStreet, '(?i)'
+ || name || '$') ORDER BY length(name) DESC LOOP
+ -- Since the regex is end of string, only the longest (first) result
+ -- is useful.
+ location := rec.value;
+ EXIT;
+ END LOOP;
+ ELSE
+ FOR rec IN SELECT substring(fullStreet, '(?i)('
+ || name || ')$') AS value, name FROM place_lookup
+ WHERE texticregexeq(fullStreet, '(?i)'
+ || name || '$') ORDER BY length(name) DESC LOOP
+ -- Since the regex is end of string, only the longest (first) result
+ -- is useful.
+ location := rec.value;
+ EXIT;
+ END LOOP;
+ END IF;
+ END IF;
+
+ RETURN location;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/location_extract_place_fuzzy.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract_place_fuzzy.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract_place_fuzzy.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,86 @@
+-- location_extract_place_fuzzy(string, stateAbbrev)
+-- This function checks the place_lookup table to find a potential match to
+-- the location described at the end of the given string. If an exact match
+-- fails, a fuzzy match is performed. The location as found in the given
+-- string is returned.
+CREATE OR REPLACE FUNCTION location_extract_place_fuzzy(
+ fullStreet VARCHAR,
+ stateAbbrev VARCHAR
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+ ws VARCHAR;
+ tempString VARCHAR;
+ location VARCHAR;
+ tempInt INTEGER;
+ word_count INTEGER;
+ rec RECORD;
+ test BOOLEAN;
+BEGIN
+ ws := E'[ ,.\n\f\t]';
+
+ tempString := substring(fullStreet, '(?i)' || ws
+ || '([a-zA-Z0-9]+)$');
+ IF tempString IS NULL THEN
+ tempString := fullStreet;
+ END IF;
+
+ IF stateAbbrev IS NOT NULL THEN
+ SELECT into tempInt count(*) FROM place_lookup
+ WHERE place_lookup.state = stateAbbrev
+ AND soundex(tempString) = end_soundex(name);
+ ELSE
+ SELECT into tempInt count(*) FROM place_lookup
+ WHERE soundex(tempString) = end_soundex(name);
+ END IF;
+
+ IF tempInt > 0 THEN
+ -- Some potentials were found. Begin a word-by-word soundex on each.
+ tempInt := 50;
+ IF stateAbbrev IS NOT NULL THEN
+ FOR rec IN SELECT name FROM place_lookup
+ WHERE place_lookup.state = stateAbbrev
+ AND soundex(tempString) = end_soundex(name) LOOP
+ word_count := count_words(rec.name);
+ test := TRUE;
+ tempString := get_last_words(fullStreet, word_count);
+ FOR i IN 1..word_count LOOP
+ IF soundex(split_part(tempString, ' ', i)) !=
+ soundex(split_part(rec.name, ' ', i)) THEN
+ test := FALSE;
+ END IF;
+ END LOOP;
+ IF test THEN
+ -- The soundex matched, determine if the distance is better.
+ IF levenshtein_ignore_case(rec.name, tempString) < tempInt THEN
+ location := tempString;
+ tempInt := levenshtein_ignore_case(rec.name, tempString);
+ END IF;
+ END IF;
+ END LOOP;
+ ELSE
+ FOR rec IN SELECT name FROM place_lookup
+ WHERE soundex(tempString) = end_soundex(name) LOOP
+ word_count := count_words(rec.name);
+ test := TRUE;
+ tempString := get_last_words(fullStreet, word_count);
+ FOR i IN 1..word_count LOOP
+ IF soundex(split_part(tempString, ' ', i)) !=
+ soundex(split_part(rec.name, ' ', i)) THEN
+ test := FALSE;
+ END IF;
+ END LOOP;
+ IF test THEN
+ -- The soundex matched, determine if the distance is better.
+ IF levenshtein_ignore_case(rec.name, tempString) < tempInt THEN
+ location := tempString;
+ tempInt := levenshtein_ignore_case(rec.name, tempString);
+ END IF;
+ END IF;
+ END LOOP;
+ END IF;
+ END IF;
+
+ RETURN location;
+END;
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/normalize_address.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/normalize_address.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/normalize_address.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,493 @@
+-- normalize_address(addressString)
+-- This takes an address string and parses it into address (internal/street)
+-- street name, type, direction prefix and suffix, location, state and
+-- zip code, depending on what can be found in the string.
+--
+-- The US postal address standard is used:
+-- <Street Number> <Direction Prefix> <Street Name> <Street Type>
+-- <Direction Suffix> <Internal Address> <Location> <State> <Zip Code>
+--
+-- State is assumed to be included in the string, and MUST be matchable to
+-- something in the state_lookup table. Fuzzy matching is used if no direct
+-- match is found.
+--
+-- Two formats of zip code are acceptable: five digit, and five + 4.
+--
+-- The internal addressing indicators are looked up from the
+-- secondary_unit_lookup table. A following identifier is accepted
+-- but it must start with a digit.
+--
+-- The location is parsed from the string using other indicators, such
+-- as street type, direction suffix or internal address, if available.
+-- If these are not, the location is extracted using comparisons against
+-- the places_lookup table, then the countysub_lookup table to determine
+-- what, in the original string, is intended to be the location. In both
+-- cases, an exact match is first pursued, then a word-by-word fuzzy match.
+-- The result is not the name of the location from the tables, but the
+-- section of the given string that corresponds to the name from the tables.
+--
+-- Zip codes and street names are not validated.
+--
+-- Direction indicators are extracted by comparison with the direction_lookup
+-- table.
+--
+-- Street addresses are assumed to be a single word, starting with a number.
+-- Address is manditory; if no address is given, and the street is numbered,
+-- the resulting address will be the street name, and the street name
+-- will be an empty string.
+--
+-- In some cases, the street type is part of the street name.
+-- eg State Hwy 22a. As long as the word following the type starts with a
+-- number (this is usually the case) this will be caught. Some street names
+-- include a type name, and have a street type that differs. This will be
+-- handled properly, so long as both are given. If the street type is
+-- omitted, the street names included type will be parsed as the street type.
+--
+-- The output is currently a colon seperated list of values:
+-- InternalAddress:StreetAddress:DirectionPrefix:StreetName:StreetType:
+-- DirectionSuffix:Location:State:ZipCode
+-- This returns each element as entered. It's mainly meant for debugging.
+-- There is also another option that returns:
+-- StreetAddress:DirectionPrefixAbbreviation:StreetName:StreetTypeAbbreviation:
+-- DirectionSuffixAbbreviation:Location:StateAbbreviation:ZipCode
+-- This is more standardized and better for use with a geocoder.
+CREATE OR REPLACE FUNCTION normalize_address(
+ rawInput VARCHAR
+) RETURNS norm_addy
+AS $_$
+DECLARE
+ result norm_addy;
+ addressString VARCHAR;
+ zipString VARCHAR;
+ preDir VARCHAR;
+ postDir VARCHAR;
+ fullStreet VARCHAR;
+ reducedStreet VARCHAR;
+ streetType VARCHAR;
+ state VARCHAR;
+ tempString VARCHAR;
+ tempInt INTEGER;
+ rec RECORD;
+ ws VARCHAR;
+BEGIN
+ result.parsed := FALSE;
+
+ IF rawInput IS NULL THEN
+ RETURN result;
+ END IF;
+
+ ws := E'[ ,.\t\n\f\r]';
+
+ -- Assume that the address begins with a digit, and extract it from
+ -- the input string.
+ addressString := substring(rawInput from '^([0-9].*?)[ ,/.]');
+
+ -- There are two formats for zip code, the normal 5 digit, and
+ -- the nine digit zip-4. It may also not exist.
+ zipString := substring(rawInput from ws || '([0-9]{5})$');
+ IF zipString IS NULL THEN
+ zipString := substring(rawInput from ws || '([0-9]{5})-[0-9]{4}$');
+ -- Check if all we got was a zipcode, of either form
+ IF zipString IS NULL THEN
+ zipString := substring(rawInput from '^([0-9]{5})$');
+ IF zipString IS NULL THEN
+ zipString := substring(rawInput from '^([0-9]{5})-[0-9]{4}$');
+ END IF;
+ -- If it was only a zipcode, then just return it.
+ IF zipString IS NOT NULL THEN
+ result.zip := to_number(zipString, '99999');
+ result.parsed := TRUE;
+ RETURN result;
+ END IF;
+ END IF;
+ END IF;
+
+ IF zipString IS NOT NULL THEN
+ fullStreet := substring(rawInput from '(.*)'
+ || ws || '+' || cull_null(zipString) || '[- ]?([0-9]{4})?$');
+ ELSE
+ fullStreet := rawInput;
+ END IF;
+
+ -- FIXME: state_extract should probably be returning a record so we can
+ -- avoid having to parse the result from it.
+ tempString := state_extract(fullStreet);
+ IF tempString IS NOT NULL THEN
+ state := split_part(tempString, ':', 1);
+ result.stateAbbrev := split_part(tempString, ':', 2);
+ END IF;
+
+ -- The easiest case is if the address is comma delimited. There are some
+ -- likely cases:
+ -- street level, location, state
+ -- street level, location state
+ -- street level, location
+ -- street level, internal address, location, state
+ -- street level, internal address, location state
+ -- street level, internal address location state
+ -- street level, internal address, location
+ -- street level, internal address location
+ -- The first three are useful.
+ tempString := substring(fullStreet, '(?i),' || ws || '+(.*?)(,?' || ws ||
+ '*' || cull_null(state) || '$)');
+ IF tempString = '' THEN tempString := NULL; END IF;
+ IF tempString IS NOT NULL THEN
+ result.location := tempString;
+ IF addressString IS NOT NULL THEN
+ fullStreet := substring(fullStreet, '(?i)' || addressString || ws ||
+ '+(.*),' || ws || '+' || result.location);
+ ELSE
+ fullStreet := substring(fullStreet, '(?i)(.*),' || ws || '+' ||
+ result.location);
+ END IF;
+ END IF;
+
+ -- Pull out the full street information, defined as everything between the
+ -- address and the state. This includes the location.
+ -- This doesnt need to be done if location has already been found.
+ IF result.location IS NULL THEN
+ IF addressString IS NOT NULL THEN
+ IF state IS NOT NULL THEN
+ fullStreet := substring(fullStreet, '(?i)' || addressString ||
+ ws || '+(.*?)' || ws || '+' || state);
+ ELSE
+ fullStreet := substring(fullStreet, '(?i)' || addressString ||
+ ws || '+(.*?)');
+ END IF;
+ ELSE
+ IF state IS NOT NULL THEN
+ fullStreet := substring(fullStreet, '(?i)(.*?)' || ws ||
+ '+' || state);
+ ELSE
+ fullStreet := substring(fullStreet, '(?i)(.*?)');
+ END IF;
+ END IF;
+ END IF;
+
+ -- Determine if any internal address is included, such as apartment
+ -- or suite number.
+ SELECT INTO tempInt count(*) FROM secondary_unit_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || name || '('
+ || ws || '|$)');
+ IF tempInt = 1 THEN
+ SELECT INTO result.internal substring(fullStreet, '(?i)' || ws || '('
+ || name || ws || '*#?' || ws
+ || '*(?:[0-9][-0-9a-zA-Z]*)?' || ')(?:' || ws || '|$)')
+ FROM secondary_unit_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || name || '('
+ || ws || '|$)');
+ ELSIF tempInt > 1 THEN
+ -- In the event of multiple matches to a secondary unit designation, we
+ -- will assume that the last one is the true one.
+ tempInt := 0;
+ FOR rec in SELECT trim(substring(fullStreet, '(?i)' || ws || '('
+ || name || '(?:' || ws || '*#?' || ws
+ || '*(?:[0-9][-0-9a-zA-Z]*)?)' || ws || '?|$)')) as value
+ FROM secondary_unit_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || name || '('
+ || ws || '|$)') LOOP
+ IF tempInt < position(rec.value in fullStreet) THEN
+ tempInt := position(rec.value in fullStreet);
+ result.internal := rec.value;
+ END IF;
+ END LOOP;
+ END IF;
+
+ IF result.location IS NULL THEN
+ -- If the internal address is given, the location is everything after it.
+ result.location := substring(fullStreet, result.internal || ws || '+(.*)$');
+ END IF;
+
+ -- Pull potential street types from the full street information
+ SELECT INTO tempInt count(*) FROM street_type_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+ || ')(?:' || ws || '|$)');
+ IF tempInt = 1 THEN
+ SELECT INTO rec abbrev, substring(fullStreet, '(?i)' || ws || '('
+ || name || ')(?:' || ws || '|$)') AS given FROM street_type_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+ || ')(?:' || ws || '|$)');
+ streetType := rec.given;
+ result.streetTypeAbbrev := rec.abbrev;
+ ELSIF tempInt > 1 THEN
+ tempInt := 0;
+ FOR rec IN SELECT abbrev, substring(fullStreet, '(?i)' || ws || '('
+ || name || ')(?:' || ws || '|$)') AS given FROM street_type_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+ || ')(?:' || ws || '|$)') LOOP
+ -- If we have found an internal address, make sure the type
+ -- precedes it.
+ IF result.internal IS NOT NULL THEN
+ IF position(rec.given IN fullStreet) < position(result.internal IN fullStreet) THEN
+ IF tempInt < position(rec.given IN fullStreet) THEN
+ streetType := rec.given;
+ result.streetTypeAbbrev := rec.abbrev;
+ tempInt := position(rec.given IN fullStreet);
+ END IF;
+ END IF;
+ ELSIF tempInt < position(rec.given IN fullStreet) THEN
+ streetType := rec.given;
+ result.streetTypeAbbrev := rec.abbrev;
+ tempInt := position(rec.given IN fullStreet);
+ END IF;
+ END LOOP;
+ END IF;
+
+ -- There is a little more processing required now. If the word after the
+ -- street type begins with a number, the street type should be considered
+ -- part of the name, as well as the next word. eg, State Route 225a. If
+ -- the next word starts with a char, then everything after the street type
+ -- will be considered location. If there is no street type, then I'm sad.
+ IF streetType IS NOT NULL THEN
+ tempString := substring(fullStreet, streetType || ws ||
+ E'+([0-9][^ ,.\t\r\n\f]*?)' || ws);
+ IF tempString IS NOT NULL THEN
+ IF result.location IS NULL THEN
+ result.location := substring(fullStreet, streetType || ws || '+'
+ || tempString || ws || '+(.*)$');
+ END IF;
+ reducedStreet := substring(fullStreet, '(.*)' || ws || '+'
+ || result.location || '$');
+ streetType := NULL;
+ result.streetTypeAbbrev := NULL;
+ ELSE
+ IF result.location IS NULL THEN
+ result.location := substring(fullStreet, streetType || ws || '+(.*)$');
+ END IF;
+ reducedStreet := substring(fullStreet, '^(.*)' || ws || '+'
+ || streetType);
+ END IF;
+
+ -- The pre direction should be at the beginning of the fullStreet string.
+ -- The post direction should be at the beginning of the location string
+ -- if there is no internal address
+ SELECT INTO tempString substring(reducedStreet, '(?i)(^' || name
+ || ')' || ws) FROM direction_lookup WHERE
+ texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ IF tempString IS NOT NULL THEN
+ preDir := tempString;
+ SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup
+ where texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ result.streetName := substring(reducedStreet, '^' || preDir || ws || '(.*)');
+ ELSE
+ result.streetName := reducedStreet;
+ END IF;
+
+ IF texticregexeq(result.location, '(?i)' || result.internal || '$') THEN
+ -- If the internal address is at the end of the location, then no
+ -- location was given. We still need to look for post direction.
+ SELECT INTO rec abbrev,
+ substring(result.location, '(?i)^(' || name || ')' || ws) as value
+ FROM direction_lookup WHERE texticregexeq(result.location, '(?i)^'
+ || name || ws) ORDER BY length(name) desc;
+ IF rec.value IS NOT NULL THEN
+ postDir := rec.value;
+ result.postDirAbbrev := rec.abbrev;
+ END IF;
+ result.location := null;
+ ELSIF result.internal IS NULL THEN
+ -- If no location is given, the location string will be the post direction
+ SELECT INTO tempInt count(*) FROM direction_lookup WHERE
+ upper(result.location) = upper(name);
+ IF tempInt != 0 THEN
+ postDir := result.location;
+ SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup WHERE
+ upper(postDir) = upper(name);
+ result.location := NULL;
+ ELSE
+ -- postDirection is not equal location, but may be contained in it.
+ SELECT INTO tempString substring(result.location, '(?i)(^' || name
+ || ')' || ws) FROM direction_lookup WHERE
+ texticregexeq(result.location, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) desc;
+ IF tempString IS NOT NULL THEN
+ postDir := tempString;
+ SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup
+ where texticregexeq(result.location, '(?i)(^' || name || ')' || ws);
+ result.location := substring(result.location, '^' || postDir || ws || '+(.*)');
+ END IF;
+ END IF;
+ ELSE
+ -- internal is not null, but is not at the end of the location string
+ -- look for post direction before the internal address
+ SELECT INTO tempString substring(fullStreet, '(?i)' || streetType
+ || ws || '+(' || name || ')' || ws || '+' || result.internal)
+ FROM direction_lookup WHERE texticregexeq(fullStreet, '(?i)'
+ || ws || name || ws || '+' || result.internal) ORDER BY length(name) desc;
+ IF tempString IS NOT NULL THEN
+ postDir := tempString;
+ SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || name || ws);
+ END IF;
+ END IF;
+ ELSE
+ -- No street type was found
+
+ -- If an internal address was given, then the split becomes easy, and the
+ -- street name is everything before it, without directions.
+ IF result.internal IS NOT NULL THEN
+ reducedStreet := substring(fullStreet, '(?i)^(.*?)' || ws || '+'
+ || result.internal);
+ SELECT INTO tempInt count(*) FROM direction_lookup WHERE
+ texticregexeq(reducedStreet, '(?i)' || ws || name || '$');
+ IF tempInt > 0 THEN
+ SELECT INTO postDir substring(reducedStreet, '(?i)' || ws || '('
+ || name || ')' || '$') FROM direction_lookup
+ WHERE texticregexeq(reducedStreet, '(?i)' || ws || name || '$');
+ SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup
+ WHERE texticregexeq(reducedStreet, '(?i)' || ws || name || '$');
+ END IF;
+ SELECT INTO tempString substring(reducedStreet, '(?i)^(' || name
+ || ')' || ws) FROM direction_lookup WHERE
+ texticregexeq(reducedStreet, '(?i)^(' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ IF tempString IS NOT NULL THEN
+ preDir := tempString;
+ SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup WHERE
+ texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ result.streetName := substring(reducedStreet, '(?i)^' || preDir || ws
+ || '+(.*?)(?:' || ws || '+' || cull_null(postDir) || '|$)');
+ ELSE
+ result.streetName := substring(reducedStreet, '(?i)^(.*?)(?:' || ws
+ || '+' || cull_null(postDir) || '|$)');
+ END IF;
+ ELSE
+
+ -- If a post direction is given, then the location is everything after,
+ -- the street name is everything before, less any pre direction.
+ SELECT INTO tempInt count(*) FROM direction_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || name || '(?:'
+ || ws || '|$)');
+
+ IF tempInt = 1 THEN
+ -- A single postDir candidate was found. This makes it easier.
+ SELECT INTO postDir substring(fullStreet, '(?i)' || ws || '('
+ || name || ')(?:' || ws || '|$)') FROM direction_lookup WHERE
+ texticregexeq(fullStreet, '(?i)' || ws || name || '(?:'
+ || ws || '|$)');
+ SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || name
+ || '(?:' || ws || '|$)');
+ IF result.location IS NULL THEN
+ result.location := substring(fullStreet, '(?i)' || ws || postDir
+ || ws || '+(.*?)$');
+ END IF;
+ reducedStreet := substring(fullStreet, '^(.*?)' || ws || '+'
+ || postDir);
+ SELECT INTO tempString substring(reducedStreet, '(?i)(^' || name
+ || ')' || ws) FROM direction_lookup WHERE
+ texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ IF tempString IS NOT NULL THEN
+ preDir := tempString;
+ SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup WHERE
+ texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ result.streetName := substring(reducedStreet, '^' || preDir || ws
+ || '+(.*)');
+ ELSE
+ result.streetName := reducedStreet;
+ END IF;
+ ELSIF tempInt > 1 THEN
+ -- Multiple postDir candidates were found. We need to find the last
+ -- incident of a direction, but avoid getting the last word from
+ -- a two word direction. eg extracting "East" from "North East"
+ -- We do this by sorting by length, and taking the last direction
+ -- in the results that is not included in an earlier one.
+ -- This wont be a problem it preDir is North East and postDir is
+ -- East as the regex requires a space before the direction. Only
+ -- the East will return from the preDir.
+ tempInt := 0;
+ FOR rec IN SELECT abbrev, substring(fullStreet, '(?i)' || ws || '('
+ || name || ')(?:' || ws || '|$)') AS value
+ FROM direction_lookup
+ WHERE texticregexeq(fullStreet, '(?i)' || ws || name
+ || '(?:' || ws || '|$)')
+ ORDER BY length(name) desc LOOP
+ tempInt := 0;
+ IF tempInt < position(rec.value in fullStreet) THEN
+ IF postDir IS NULL THEN
+ tempInt := position(rec.value in fullStreet);
+ postDir := rec.value;
+ result.postDirAbbrev := rec.abbrev;
+ ELSIF NOT texticregexeq(postDir, '(?i)' || rec.value) THEN
+ tempInt := position(rec.value in fullStreet);
+ postDir := rec.value;
+ result.postDirAbbrev := rec.abbrev;
+ END IF;
+ END IF;
+ END LOOP;
+ IF result.location IS NULL THEN
+ result.location := substring(fullStreet, '(?i)' || ws || postDir || ws
+ || '+(.*?)$');
+ END IF;
+ reducedStreet := substring(fullStreet, '(?i)^(.*?)' || ws || '+'
+ || postDir);
+ SELECT INTO tempString substring(reducedStreet, '(?i)(^' || name
+ || ')' || ws) FROM direction_lookup WHERE
+ texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ IF tempString IS NOT NULL THEN
+ preDir := tempString;
+ SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup WHERE
+ texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ result.streetName := substring(reducedStreet, '^' || preDir || ws
+ || '+(.*)');
+ ELSE
+ result.streetName := reducedStreet;
+ END IF;
+ ELSE
+
+ -- There is no street type, directional suffix or internal address
+ -- to allow distinction between street name and location.
+ IF result.location IS NULL THEN
+ result.location := location_extract(fullStreet, result.stateAbbrev);
+ -- If the location was found, remove it from fullStreet
+ fullStreet := substring(fullStreet, '(?i)(.*),' || ws || '+' ||
+ result.location);
+ END IF;
+
+ -- Check for a direction prefix.
+ SELECT INTO tempString substring(fullStreet, '(?i)(^' || name
+ || ')' || ws) FROM direction_lookup WHERE
+ texticregexeq(fullStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name);
+ IF tempString IS NOT NULL THEN
+ preDir := tempString;
+ SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup WHERE
+ texticregexeq(fullStreet, '(?i)(^' || name || ')' || ws)
+ ORDER BY length(name) DESC;
+ IF result.location IS NOT NULL THEN
+ -- The location may still be in the fullStreet, or may
+ -- have been removed already
+ result.streetName := substring(fullStreet, '^' || preDir || ws
+ || '+(.*?)(' || ws || '+' || result.location || '|$)');
+ ELSE
+ result.streetName := substring(fullStreet, '^' || preDir || ws
+ || '+(.*?)' || ws || '*');
+ END IF;
+ ELSE
+ IF result.location IS NOT NULL THEN
+ -- The location may still be in the fullStreet, or may
+ -- have been removed already
+ result.streetName := substring(fullStreet, '^(.*?)(' || ws
+ || '+' || result.location || '|$)');
+ ELSE
+ result.streetName := fullStreet;
+ END IF;
+ END IF;
+ END IF;
+ END IF;
+ END IF;
+
+ result.address := to_number(addressString, '99999999999');
+ result.zip := to_number(zipString, '99999');
+
+ result.parsed := TRUE;
+ RETURN result;
+END
+$_$ LANGUAGE plpgsql;
Added: trunk/extras/tiger_geocoder/normalize/state_extract.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/state_extract.sql 2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/state_extract.sql 2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,82 @@
+-- state_extract(addressStringLessZipCode)
+-- Extracts the state from end of the given string.
+--
+-- This function uses the state_lookup table to determine which state
+-- the input string is indicating. First, an exact match is pursued,
+-- and in the event of failure, a word-by-word fuzzy match is attempted.
+--
+-- The result is the state as given in the input string, and the approved
+-- state abbreviation, seperated by a colon.
+CREATE OR REPLACE FUNCTION state_extract(rawInput VARCHAR) RETURNS VARCHAR
+AS $_$
+DECLARE
+ tempInt INTEGER;
+ tempString VARCHAR;
+ state VARCHAR;
+ stateAbbrev VARCHAR;
+ result VARCHAR;
+ rec RECORD;
+ test BOOLEAN;
+ ws VARCHAR;
+BEGIN
+ ws := E'[ ,.\t\n\f\r]';
+
+ -- Separate out the last word of the state, and use it to compare to
+ -- the state lookup table to determine the entire name, as well as the
+ -- abbreviation associated with it. The zip code may or may not have
+ -- been found.
+ tempString := substring(rawInput from ws || E'+([^ ,.\t\n\f\r0-9]*?)$');
+ SELECT INTO tempInt count(*) FROM (select distinct abbrev from state_lookup
+ WHERE upper(abbrev) = upper(tempString)) as blah;
+ IF tempInt = 1 THEN
+ state := tempString;
+ SELECT INTO stateAbbrev abbrev FROM (select distinct abbrev from
+ state_lookup WHERE upper(abbrev) = upper(tempString)) as blah;
+ ELSE
+ SELECT INTO tempInt count(*) FROM state_lookup WHERE upper(name)
+ like upper('%' || tempString);
+ IF tempInt >= 1 THEN
+ FOR rec IN SELECT name from state_lookup WHERE upper(name)
+ like upper('%' || tempString) LOOP
+ SELECT INTO test texticregexeq(rawInput, name) FROM state_lookup
+ WHERE rec.name = name;
+ IF test THEN
+ SELECT INTO stateAbbrev abbrev FROM state_lookup
+ WHERE rec.name = name;
+ state := substring(rawInput, '(?i)' || rec.name);
+ EXIT;
+ END IF;
+ END LOOP;
+ ELSE
+ -- No direct match for state, so perform fuzzy match.
+ SELECT INTO tempInt count(*) FROM state_lookup
+ WHERE soundex(tempString) = end_soundex(name);
+ IF tempInt >= 1 THEN
+ FOR rec IN SELECT name, abbrev FROM state_lookup
+ WHERE soundex(tempString) = end_soundex(name) LOOP
+ tempInt := count_words(rec.name);
+ tempString := get_last_words(rawInput, tempInt);
+ test := TRUE;
+ FOR i IN 1..tempInt LOOP
+ IF soundex(split_part(tempString, ' ', i)) !=
+ soundex(split_part(rec.name, ' ', i)) THEN
+ test := FALSE;
+ END IF;
+ END LOOP;
+ IF test THEN
+ state := tempString;
+ stateAbbrev := rec.abbrev;
+ EXIT;
+ END IF;
+ END LOOP;
+ END IF;
+ END IF;
+ END IF;
+
+ IF state IS NOT NULL AND stateAbbrev IS NOT NULL THEN
+ result := state || ':' || stateAbbrev;
+ END IF;
+
+ RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
More information about the postgis-commits
mailing list