[postgis-commits] svn - r2642 - in trunk/extras/tiger_geocoder: . geocode normalize

postgis-commits at postgis.refractions.net postgis-commits at postgis.refractions.net
Tue Jul 3 14:30:34 PDT 2007


Author: snowman
Date: 2007-07-03 14:30:34 -0700 (Tue, 03 Jul 2007)
New Revision: 2642

Added:
   trunk/extras/tiger_geocoder/create_geocode.sql
   trunk/extras/tiger_geocoder/geocode/
   trunk/extras/tiger_geocoder/geocode/geocode.sql
   trunk/extras/tiger_geocoder/geocode/geocode_address.sql
   trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_exact.sql
   trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_fuzzy.sql
   trunk/extras/tiger_geocoder/geocode/geocode_address_place_exact.sql
   trunk/extras/tiger_geocoder/geocode/geocode_address_place_fuzzy.sql
   trunk/extras/tiger_geocoder/geocode/geocode_address_state.sql
   trunk/extras/tiger_geocoder/geocode/geocode_address_zip.sql
   trunk/extras/tiger_geocoder/geocode/geocode_get_point.sql
   trunk/extras/tiger_geocoder/geocode/geocode_location.sql
   trunk/extras/tiger_geocoder/geocode/geocode_zip.sql
   trunk/extras/tiger_geocoder/geocode/includes_address.sql
   trunk/extras/tiger_geocoder/geocode/interpolate_from_address.sql
   trunk/extras/tiger_geocoder/geocode/rate_attributes.sql
   trunk/extras/tiger_geocoder/normalize/
   trunk/extras/tiger_geocoder/normalize/count_words.sql
   trunk/extras/tiger_geocoder/normalize/end_soundex.sql
   trunk/extras/tiger_geocoder/normalize/get_last_words.sql
   trunk/extras/tiger_geocoder/normalize/location_extract.sql
   trunk/extras/tiger_geocoder/normalize/location_extract_countysub_exact.sql
   trunk/extras/tiger_geocoder/normalize/location_extract_countysub_fuzzy.sql
   trunk/extras/tiger_geocoder/normalize/location_extract_place_exact.sql
   trunk/extras/tiger_geocoder/normalize/location_extract_place_fuzzy.sql
   trunk/extras/tiger_geocoder/normalize/normalize_address.sql
   trunk/extras/tiger_geocoder/normalize/state_extract.sql
Log:
Add in broken out/updated normalize/geocode functions
- create_geocode.sql:
  Main creation script for pulling in all the other
  .sql files to create all the functions and whatnot
- normalize/
  Normalization routines, includes mapping 'North' -> 'N',
  'Virginia' -> 'VA', etc, etc.
- geocode/
  Actual geocoding routines to find the point geometry of the
  address.  Includes interpolation across the linestring found
  for the location (perhaps not the best), and fallbacks to
  zip-code and city, state matches using the associated lookup
  tables for those.  Also currently returns a set rather than
  a cursor, that's up for some debate but the cursor makes it
  difficult to do things like fallback, imv.  Especially since
  references to it from another pl/pgsql function require it to
  be a specific record type across multiple calls.  That's
  currently a problem. :/


Added: trunk/extras/tiger_geocoder/create_geocode.sql
===================================================================
--- trunk/extras/tiger_geocoder/create_geocode.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/create_geocode.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,58 @@
+
+-- Tiger is where we're going to create the functions, but we need
+-- the PostGIS functions/types which are in public.
+SET search_path TO tiger,public;
+
+-- Type used to pass around a normalized address between functions
+DROP TYPE IF EXISTS norm_addy CASCADE;
+CREATE TYPE norm_addy AS (
+    address INTEGER,
+    preDirAbbrev VARCHAR,
+    streetName VARCHAR,
+    streetTypeAbbrev VARCHAR,
+    postDirAbbrev VARCHAR,
+    internal VARCHAR,
+    location VARCHAR,
+    stateAbbrev VARCHAR,
+    zip INTEGER,
+    parsed BOOLEAN);
+
+-- System/General helper functions
+\i utility/utmzone.sql
+\i utility/cull_null.sql
+\i utility/nullable_levenshtein.sql
+\i utility/levenshtein_ignore_case.sql
+
+---- Address normalizer
+-- General helpers
+\i normalize/end_soundex.sql
+\i normalize/count_words.sql
+\i normalize/state_extract.sql
+\i normalize/get_last_words.sql
+-- Location extraction/normalization helpers
+\i normalize/location_extract_countysub_exact.sql
+\i normalize/location_extract_countysub_fuzzy.sql
+\i normalize/location_extract_place_exact.sql
+\i normalize/location_extract_place_fuzzy.sql
+\i normalize/location_extract.sql
+-- Normalization API, called by geocode mainly.
+\i normalize/normalize_address.sql
+
+---- Geocoder functions
+-- General helpers
+\i geocode/rate_attributes.sql
+\i geocode/includes_address.sql
+\i geocode/interpolate_from_address.sql
+-- Actual lookups/geocoder helpers
+\i geocode/geocode_address_countysub_exact.sql
+\i geocode/geocode_address_countysub_fuzzy.sql
+\i geocode/geocode_address_place_exact.sql
+\i geocode/geocode_address_place_fuzzy.sql
+\i geocode/geocode_address.sql
+\i geocode/geocode_address_state.sql
+\i geocode/geocode_address_zip.sql
+\i geocode/geocode_zip.sql
+\i geocode/geocode_location.sql
+-- Geocode API, called by user
+\i geocode/geocode_get_point.sql
+\i geocode/geocode.sql

Added: trunk/extras/tiger_geocoder/geocode/geocode.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,85 @@
+CREATE OR REPLACE FUNCTION geocode(
+    input VARCHAR,
+    OUT NORM_ADDY VARCHAR,
+    OUT GEOMOUT GEOMETRY,
+    OUT RATING INTEGER
+) RETURNS SETOF RECORD
+AS $_$
+DECLARE
+  parsed norm_addy;
+  result REFCURSOR;
+  rec RECORD;
+BEGIN
+
+  IF input IS NULL THEN
+    RETURN;
+  END IF;
+
+  -- Pass the input string into the address normalizer
+  parsed := normalize_address(input);
+  IF NOT parsed.parsed THEN
+    RETURN;
+  END IF;
+
+  -- Go for the full monty if we've got enough info
+  IF parsed.address IS NOT NULL AND
+      parsed.streetName IS NOT NULL AND
+      (parsed.zip IS NOT NULL OR parsed.stateAbbrev IS NOT NULL) THEN
+
+    result := geocode_address(parsed);
+  END IF;
+
+  -- Next best is zipcode, if we've got it
+  IF result IS NULL AND parsed.zip IS NOT NULL THEN
+    result := geocode_zip(parsed);
+  END IF;
+
+  -- No zip code, try state/location, need both or we'll get too much stuffs.
+  IF result IS NULL AND parsed.stateAbbrev IS NOT NULL AND parsed.location IS NOT NULL THEN
+    result := geocode_location(parsed);
+  END IF;
+
+  IF result IS NULL THEN
+    RETURN;
+  END IF;
+
+  ans := false;
+  LOOP
+    FETCH result INTO rec;
+
+    IF NOT FOUND THEN
+        RETURN;
+    END IF;
+
+    NORM_ADDY := cull_null(parsed.address::text)
+              || CASE WHEN rec.fedirp IS NOT NULL THEN ' ' ELSE '' END
+              || cull_null(rec.fedirp)
+              || CASE WHEN rec.fename IS NOT NULL THEN ' ' ELSE '' END
+              || cull_null(rec.fename)
+              || CASE WHEN rec.fetype IS NOT NULL THEN ' ' ELSE '' END
+              || cull_null(rec.fetype)
+              || CASE WHEN rec.fedirs IS NOT NULL THEN ' ' ELSE '' END
+              || cull_null(rec.fedirs)
+              || CASE WHEN
+                   parsed.address IS NOT NULL OR
+                   rec.fename IS NOT NULL
+                   THEN ', ' ELSE '' END
+              || cull_null(parsed.internal)
+              || CASE WHEN parsed.internal IS NOT NULL THEN ', ' ELSE '' END
+              || cull_null(rec.place)
+              || CASE WHEN rec.place IS NOT NULL THEN ', ' ELSE '' END
+              || cull_null(rec.state)
+              || CASE WHEN rec.state IS NOT NULL THEN ' ' ELSE '' END
+              || cull_null(lpad(rec.zip,5,'0'));
+
+    GEOMOUT := rec.address_geom;
+    RATING := rec.rating;
+
+    RETURN NEXT;
+    END IF;
+  END LOOP;
+
+  RETURN;
+
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_address.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,94 @@
+-- geocode(cursor, address, directionPrefix, streetName,
+-- streetTypeAbbreviation, directionSuffix, location, stateAbbreviation,
+-- zipCode)
+CREATE OR REPLACE FUNCTION geocode_address(
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  result REFCURSOR;
+  tempString VARCHAR;
+  ziplookup RECORD;
+BEGIN
+  -- The first step is to determine what weve been given, and if its enough.
+  IF parsed.address IS NULL THEN
+    -- The address is manditory.
+    -- Without it, wed be wandering into strangers homes all the time.
+    RETURN NULL;
+  END IF;
+
+  IF parsed.streetName IS NULL THEN
+    -- A street name must be given.  Think about it.
+    RETURN NULL;
+  END IF;
+
+  IF parsed.zip IS NOT NULL THEN
+    -- If the zip code is given, it is the most useful way to narrow the
+    -- search.  We will try it first, and if no results match, we will move
+    -- on to a location search.  There is no fuzzy searching on zip codes.
+    result := geocode_address_zip(result, parsed);
+    IF result IS NOT NULL THEN
+      RETURN result;
+    END IF;
+    -- If we weren't able to find one using the zip code, but the zip code
+    -- exists, and location is null, then fill in the location and/or state
+    -- based on the zip code so that the location lookup has a chance.
+    IF parsed.stateAbbrev IS NULL OR parsed.location IS NULL THEN
+        SELECT INTO ziplookup * FROM zip_lookup_base JOIN state_lookup ON (state = name) WHERE zip = parsed.zip;
+        IF FOUND THEN
+            parsed.stateAbbrev := coalesce(parsed.stateAbbrev,ziplookup.abbrev);
+            parsed.location := coalesce(parsed.location,ziplookup.city);
+        END IF;
+    END IF;
+  END IF;
+
+  -- After now, the location becomes manditory.
+  IF parsed.location IS NOT NULL THEN
+    -- location may be useful, it may not. The first step is to determine if
+    -- there are any potenial matches in the place and countysub fields.
+    -- This is done against the lookup tables, and will save us time on much
+    -- larger queries if they dont match.
+    tempString := location_extract_place_exact(parsed.location, parsed.stateAbbrev);
+    IF tempString IS NOT NULL THEN
+      result := geocode_address_place_exact(result, parsed);
+      IF result IS NOT NULL THEN
+        RETURN result;
+      END IF;
+    END IF;
+
+    tempString := location_extract_countysub_exact(parsed.location, parsed.stateAbbrev);
+    IF tempString IS NOT NULL THEN
+      result := geocode_address_countysub_exact(result, parsed);
+      IF result IS NOT NULL THEN
+        RETURN result;
+      END IF;
+    END IF;
+
+    tempString := location_extract_place_fuzzy(parsed.location, parsed.stateAbbrev);
+    IF tempString IS NOT NULL THEN
+      result := geocode_address_place_fuzzy(result, parsed);
+      IF result IS NOT NULL THEN
+        RETURN result;
+      END IF;
+    END IF;
+
+    tempString := location_extract_countysub_fuzzy(parsed.location, parsed.stateAbbrev);
+    IF tempString IS NOT NULL THEN
+      result := geocode_address_countysub_fuzzy(result, parsed);
+      IF result IS NOT NULL THEN
+        RETURN result;
+      END IF;
+    END IF;
+  END IF;
+
+  -- Try with just the state if we can't find the location
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    result := geocode_address_state(result, parsed);
+    IF result IS NOT NULL THEN
+      RETURN result;
+    END IF;
+  END IF;
+
+  RETURN NULL;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_exact.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_exact.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_exact.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,161 @@
+CREATE OR REPLACE FUNCTION geocode_address_countysub_exact(
+    result REFCURSOR,
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  result REFCURSOR;
+  tempString VARCHAR;
+  tempInt VARCHAR;
+BEGIN
+  IF parsed.location IS NULL THEN
+    -- location is manditory.  This is the location geocoder after all.
+    RETURN NULL;
+  END IF;
+
+  -- Check to see if the road name can be matched.
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+        WHERE parsed.location = tiger_geocode_roads.cousub
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state;
+  ELSE
+    SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+        WHERE parsed.location = tiger_geocode_roads.cousub
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+  END IF;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  ELSE
+    -- The road name matches, now we check to see if the addresses match
+    IF parsed.stateAbbrev IS NOT NULL THEN
+      SELECT INTO tempInt count(*)
+      FROM (
+        SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+          parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+          tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+          tiger_geocode_roads.fedirs) as rating
+        FROM tiger_geocode_roads
+        WHERE parsed.location = tiger_geocode_roads.cousub
+          AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+          AND parsed.stateAbbrev = tiger_geocode_roads.state
+          ) AS subquery, roads_local
+      WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+          roads_local.fraddr, roads_local.toaddr)
+        AND subquery.tlid = roads_local.tlid;
+    ELSE
+      SELECT INTO tempInt count(*)
+      FROM (
+        SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+          parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+          tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+          tiger_geocode_roads.fedirs) as rating
+        FROM tiger_geocode_roads
+        WHERE parsed.location = tiger_geocode_roads.cousub
+          AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+          ) AS subquery, roads_local
+      WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+          roads_local.fraddr, roads_local.toaddr)
+        AND subquery.tlid = roads_local.tlid;
+    END IF;
+
+    IF tempInt = 0 THEN
+      RETURN NULL;
+    ELSE
+      IF parsed.stateAbbrev IS NOT NULL THEN
+        OPEN result FOR
+        SELECT
+            roads_local.fedirp as fedirp,
+            roads_local.fename as fename,
+            roads_local.fetype as fetype,
+            roads_local.fedirs as fedirs,
+            CASE WHEN (parsed.address % 2) = roads_local.fraddl
+              OR (parsed.address % 2) = roads_local.toaddl
+              THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+            CASE WHEN (parsed.address % 2) = roads_local.fraddl
+              OR (parsed.address % 2) = roads_local.toaddl
+              THEN sl.abbrev ELSE sr.abbrev END as state,
+            CASE WHEN (parsed.address % 2) = roads_local.fraddl
+              OR (parsed.address % 2) = roads_local.toaddl
+              THEN zipl ELSE zipr END as zip,
+            interpolate_from_address(parsed.address, roads_local.fraddl,
+                roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+                roads_local.geom) as address_geom,
+            subquery.rating as rating
+        FROM (
+          SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+            parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+            tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+            tiger_geocode_roads.fedirs, parsed.location,
+            tiger_geocode_roads.cousub) as rating
+          FROM tiger_geocode_roads
+          WHERE parsed.location = tiger_geocode_roads.cousub
+            AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+            AND parsed.stateAbbrev = tiger_geocode_roads.state
+            ) AS subquery
+          JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+          JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+          JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+          LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+          LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+          LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+          LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+          LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+          LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+          LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+          LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+        WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+            roads_local.fraddr, roads_local.toaddr)
+        ORDER BY subquery.rating;
+        RETURN result;
+      ELSE
+        OPEN result FOR
+        SELECT
+            roads_local.fedirp as fedirp,
+            roads_local.fename as fename,
+            roads_local.fetype as fetype,
+            roads_local.fedirs as fedirs,
+            CASE WHEN (parsed.address % 2) = roads_local.fraddl
+              OR (parsed.address % 2) = roads_local.toaddl
+              THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+            CASE WHEN (parsed.address % 2) = roads_local.fraddl
+              OR (parsed.address % 2) = roads_local.toaddl
+              THEN sl.abbrev ELSE sr.abbrev END as state,
+            CASE WHEN (parsed.address % 2) = roads_local.fraddl
+              OR (parsed.address % 2) = roads_local.toaddl
+              THEN zipl ELSE zipr END as zip,
+            interpolate_from_address(parsed.address, roads_local.fraddl,
+                roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+                roads_local.geom) as address_geom,
+            subquery.rating as rating
+        FROM (
+          SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+            parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+            tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+            tiger_geocode_roads.fedirs, parsed.location,
+            tiger_geocode_roads.cousub) as rating
+          FROM tiger_geocode_roads
+          WHERE parsed.location = tiger_geocode_roads.cousub
+            AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+            ) AS subquery
+            JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+            JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+            JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+            LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+            LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+            LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+            LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+            LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+            LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+            LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+            LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+        WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+            roads_local.fraddr, roads_local.toaddr)
+            ORDER BY subquery.rating;
+        RETURN result;
+      END IF;
+    END IF;
+  END IF;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_fuzzy.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_fuzzy.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_countysub_fuzzy.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,162 @@
+CREATE OR REPLACE FUNCTION geocode_address_countysub_fuzzy(
+    result REFCURSOR,
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  result REFCURSOR;
+  tempString VARCHAR;
+  tempInt VARCHAR;
+BEGIN
+  -- The first step is to determine what weve been given, and if its enough.
+  IF parsed.location IS NULL THEN
+    -- location is manditory.  This is the location geocoder after all.
+    RETURN NULL;
+  END IF;
+
+  -- Check to see if the road name can be matched.
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+        WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state;
+  ELSE
+    SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+        WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+  END IF;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  -- The road name matches, now we check to see if the addresses match
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*)
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs) as rating
+      FROM tiger_geocode_roads
+      WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state
+        ) AS subquery, roads_local
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+      AND subquery.tlid = roads_local.tlid;
+  ELSE
+    SELECT INTO tempInt count(*)
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs) as rating
+      FROM tiger_geocode_roads
+      WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        ) AS subquery, roads_local
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+      AND subquery.tlid = roads_local.tlid;
+  END IF;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    OPEN result FOR
+    SELECT
+        roads_local.fedirp as fedirp,
+        roads_local.fename as fename,
+        roads_local.fetype as fetype,
+        roads_local.fedirs as fedirs,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN sl.abbrev ELSE sr.abbrev END as state,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN zipl ELSE zipr END as zip,
+        interpolate_from_address(parsed.address, roads_local.fraddl,
+            roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+            roads_local.geom) as address_geom,
+        subquery.rating as rating
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs, parsed.location,
+        tiger_geocode_roads.cousub) as rating
+      FROM tiger_geocode_roads
+      WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state
+        ) AS subquery
+        JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+        JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+        JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+        LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+        LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+        LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+        LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+        LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+        LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+        LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+        LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+    ORDER BY subquery.rating;
+    RETURN result;
+  ELSE
+    OPEN result FOR
+    SELECT
+        roads_local.fedirp as fedirp,
+        roads_local.fename as fename,
+        roads_local.fetype as fetype,
+        roads_local.fedirs as fedirs,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN sl.abbrev ELSE sr.abbrev END as state,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN zipl ELSE zipr END as zip,
+        interpolate_from_address(parsed.address, roads_local.fraddl,
+            roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+            roads_local.geom) as address_geom,
+        subquery.rating as rating
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs, parsed.location,
+        tiger_geocode_roads.cousub) as rating
+      FROM tiger_geocode_roads
+      WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.cousub)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        ) AS subquery
+        JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+        JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+        JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+        LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+        LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+        LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+        LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+        LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+        LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+        LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+        LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+    ORDER BY subquery.rating;
+    RETURN result;
+  END IF;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_address_place_exact.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_place_exact.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_place_exact.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,156 @@
+CREATE OR REPLACE FUNCTION geocode_address_place_exact(
+    result REFCURSOR,
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  result REFCURSOR;
+  tempString VARCHAR;
+  tempInt VARCHAR;
+BEGIN
+  -- Check to see if the road name can be matched.
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+        WHERE parsed.location = tiger_geocode_roads.place
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state;
+  ELSE
+    SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+        WHERE parsed.location = tiger_geocode_roads.place
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+  END IF;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  -- The road name matches, now we check to see if the addresses match
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*)
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs) as rating
+      FROM tiger_geocode_roads
+      WHERE parsed.location = tiger_geocode_roads.place
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state
+        ) AS subquery, roads_local
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+      AND subquery.tlid = roads_local.tlid;
+  ELSE
+    SELECT INTO tempInt count(*)
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs) as rating
+      FROM tiger_geocode_roads
+      WHERE parsed.location = tiger_geocode_roads.place
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        ) AS subquery, roads_local
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+      AND subquery.tlid = roads_local.tlid;
+  END IF;
+
+  IF tempInt = 0 THEN
+    return NULL;
+  END IF;
+
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    OPEN result FOR
+    SELECT
+        roads_local.fedirp as fedirp,
+        roads_local.fename as fename,
+        roads_local.fetype as fetype,
+        roads_local.fedirs as fedirs,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN sl.abbrev ELSE sr.abbrev END as state,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN zipl ELSE zipr END as zip,
+        interpolate_from_address(parsed.address, roads_local.fraddl,
+            roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+            roads_local.geom) as address_geom,
+        subquery.rating as rating
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs, parsed.location,
+        tiger_geocode_roads.place) as rating
+      FROM tiger_geocode_roads
+      WHERE parsed.location = tiger_geocode_roads.place
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state
+        ) AS subquery
+        JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+        JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+        JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+        LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+        LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+        LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+        LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+        LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+        LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+        LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+        LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+    ORDER BY subquery.rating;
+    RETURN result;
+  ELSE
+    OPEN result FOR
+    SELECT
+        roads_local.fedirp as fedirp,
+        roads_local.fename as fename,
+        roads_local.fetype as fetype,
+        roads_local.fedirs as fedirs,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN sl.abbrev ELSE sr.abbrev END as state,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN zipl ELSE zipr END as zip,
+        interpolate_from_address(parsed.address, roads_local.fraddl,
+            roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+            roads_local.geom) as address_geom,
+        subquery.rating as rating
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs, parsed.location,
+        tiger_geocode_roads.place) as rating
+      FROM tiger_geocode_roads
+      WHERE parsed.location = tiger_geocode_roads.place
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        ) AS subquery
+        JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+        JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+        JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+        LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+        LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+        LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+        LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+        LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+        LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+        LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+        LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+    ORDER BY subquery.rating;
+    RETURN result;
+  END IF;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_address_place_fuzzy.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_place_fuzzy.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_place_fuzzy.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,155 @@
+CREATE OR REPLACE FUNCTION geocode_address_place_fuzzy(
+    result REFCURSOR,
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  tempString VARCHAR;
+  tempInt VARCHAR;
+BEGIN
+  -- Check to see if the road name can be matched.
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+        WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state;
+  ELSE
+    SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+        WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+  END IF;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  -- The road name matches, now we check to see if the addresses match
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*)
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs) as rating
+      FROM tiger_geocode_roads
+      WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state
+        ) AS subquery, roads_local
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+      AND subquery.tlid = roads_local.tlid;
+  ELSE
+    SELECT INTO tempInt count(*)
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs) as rating
+      FROM tiger_geocode_roads
+      WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        ) AS subquery, roads_local
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+      AND subquery.tlid = roads_local.tlid;
+  END IF;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  IF parsed.stateAbbrev IS NOT NULL THEN
+    OPEN result FOR
+    SELECT
+        roads_local.fedirp as fedirp,
+        roads_local.fename as fename,
+        roads_local.fetype as fetype,
+        roads_local.fedirs as fedirs,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN sl.abbrev ELSE sr.abbrev END as state,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN zipl ELSE zipr END as zip,
+        interpolate_from_address(parsed.address, roads_local.fraddl,
+            roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+            roads_local.geom) as address_geom,
+        subquery.rating as rating
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs, parsed.location,
+        tiger_geocode_roads.place) as rating
+      FROM tiger_geocode_roads
+      WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        AND parsed.stateAbbrev = tiger_geocode_roads.state
+        ) AS subquery
+        JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+        JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+        JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+        LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+        LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+        LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+        LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+        LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+        LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+        LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+        LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+    ORDER BY subquery.rating;
+    RETURN result;
+  ELSE
+    OPEN result FOR
+    SELECT
+        roads_local.fedirp as fedirp,
+        roads_local.fename as fename,
+        roads_local.fetype as fetype,
+        roads_local.fedirs as fedirs,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN sl.abbrev ELSE sr.abbrev END as state,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN zipl ELSE zipr END as zip,
+        interpolate_from_address(parsed.address, roads_local.fraddl,
+            roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+            roads_local.geom) as address_geom,
+        subquery.rating as rating
+    FROM (
+      SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+        parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+        tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+        tiger_geocode_roads.fedirs, parsed.location,
+        tiger_geocode_roads.place) as rating
+      FROM tiger_geocode_roads
+      WHERE soundex(parsed.location) = soundex(tiger_geocode_roads.place)
+        AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+        ) AS subquery
+        JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+        JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+        JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+        LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+        LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+        LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+        LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+        LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+        LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+        LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+        LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+    ORDER BY subquery.rating;
+    RETURN result;
+  END IF;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_address_state.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_state.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_state.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,81 @@
+CREATE OR REPLACE FUNCTION geocode_address_state(
+    result REFCURSOR,
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  tempInt VARCHAR;
+BEGIN
+  -- Check to see if the road name can be matched.
+  SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+      WHERE soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+      AND parsed.stateAbbrev = tiger_geocode_roads.state;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  -- The road name matches, now we check to see if the addresses match
+  SELECT INTO tempInt count(*)
+  FROM (
+    SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+      parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+      tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+      tiger_geocode_roads.fedirs) as rating
+    FROM tiger_geocode_roads
+    WHERE soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+      AND parsed.stateAbbrev = tiger_geocode_roads.state
+      ) AS subquery, roads_local
+    WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+        roads_local.fraddr, roads_local.toaddr)
+    AND subquery.tlid = roads_local.tlid;
+
+  IF tempInt = 0 THEN
+    return NULL;
+  END IF;
+
+  OPEN result FOR
+    SELECT
+        roads_local.fedirp as fedirp,
+        roads_local.fename as fename,
+        roads_local.fetype as fetype,
+        roads_local.fedirs as fedirs,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN sl.abbrev ELSE sr.abbrev END as state,
+        CASE WHEN (parsed.address % 2) = roads_local.fraddl
+          OR (parsed.address % 2) = roads_local.toaddl
+          THEN zipl ELSE zipr END as zip,
+        interpolate_from_address(parsed.address, roads_local.fraddl,
+            roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+            roads_local.geom) as address_geom,
+        subquery.rating as rating
+  FROM (
+    SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+      parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+      tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+      tiger_geocode_roads.fedirs) as rating
+    FROM tiger_geocode_roads
+    WHERE soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+      AND parsed.stateAbbrev = tiger_geocode_roads.state
+      ) AS subquery
+      JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+      JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+      JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+      LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+      LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+      LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+      LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+      LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+      LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+      LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+      LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+  WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+      roads_local.fraddr, roads_local.toaddr)
+  ORDER BY subquery.rating;
+  RETURN result;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_address_zip.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_address_zip.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_address_zip.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,83 @@
+CREATE OR REPLACE FUNCTION geocode_address_zip(
+    result REFCURSOR,
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  tempString VARCHAR;
+  tempInt VARCHAR;
+BEGIN
+  -- Check to see if the road name can be matched.
+  SELECT INTO tempInt count(*) FROM tiger_geocode_roads
+      WHERE parsed.zip = tiger_geocode_roads.zip
+      AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename);
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  -- The road name matches, now we check to see if the addresses match
+  SELECT INTO tempInt count(*)
+  FROM (
+    SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+      parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+      tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+      tiger_geocode_roads.fedirs) as rating
+    FROM tiger_geocode_roads
+    WHERE parsed.zip = tiger_geocode_roads.zip
+      AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+      ) AS subquery, roads_local
+  WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+      roads_local.fraddr, roads_local.toaddr)
+    AND subquery.tlid = roads_local.tlid;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  OPEN result FOR
+  SELECT
+      roads_local.fedirp as fedirp,
+      roads_local.fename as fename,
+      roads_local.fetype as fetype,
+      roads_local.fedirs as fedirs,
+      CASE WHEN (parsed.address % 2) = roads_local.fraddl
+        OR (parsed.address % 2) = roads_local.toaddl
+        THEN coalesce(pl.name,zipl.city,csl.name,col.name) ELSE coalesce(pr.name,zipr.city,csr.name,cor.name) END as place,
+      CASE WHEN (parsed.address % 2) = roads_local.fraddl
+        OR (parsed.address % 2) = roads_local.toaddl
+        THEN sl.abbrev ELSE sr.abbrev END as state,
+      CASE WHEN (parsed.address % 2) = roads_local.fraddl
+        OR (parsed.address % 2) = roads_local.toaddl
+        THEN zipl ELSE zipr END as zip,
+      interpolate_from_address(parsed.address, roads_local.fraddl,
+          roads_local.toaddl, roads_local.fraddr, roads_local.toaddr,
+          roads_local.geom) as address_geom,
+      subquery.rating as rating
+  FROM (
+    SELECT *, rate_attributes(parsed.preDirAbbrev, tiger_geocode_roads.fedirp,
+      parsed.streetName, tiger_geocode_roads.fename, parsed.streetTypeAbbrev,
+      tiger_geocode_roads.fetype, parsed.postDirAbbrev,
+      tiger_geocode_roads.fedirs) as rating
+    FROM tiger_geocode_roads
+    WHERE parsed.zip = tiger_geocode_roads.zip
+      AND soundex(parsed.streetName) = soundex(tiger_geocode_roads.fename)
+      ) AS subquery
+    JOIN roads_local ON (subquery.tlid = roads_local.tlid)
+    JOIN state_lookup sl ON (roads_local.statel = sl.st_code)
+    JOIN state_lookup sr ON (roads_local.stater = sr.st_code)
+    LEFT JOIN place_lookup pl ON (roads_local.statel = pl.st_code AND roads_local.placel = pl.pl_code)
+    LEFT JOIN place_lookup pr ON (roads_local.stater = pr.st_code AND roads_local.placer = pr.pl_code)
+    LEFT JOIN county_lookup col ON (roads_local.statel = col.st_code AND roads_local.countyl = col.co_code)
+    LEFT JOIN county_lookup cor ON (roads_local.stater = cor.st_code AND roads_local.countyr = cor.co_code)
+    LEFT JOIN countysub_lookup csl ON (roads_local.statel = csl.st_code AND roads_local.countyl = csl.co_code AND roads_local.cousubl = csl.cs_code)
+    LEFT JOIN countysub_lookup csr ON (roads_local.stater = csr.st_code AND roads_local.countyr = csr.co_code AND roads_local.cousubr = csr.cs_code)
+    LEFT JOIN zip_lookup_base zipl ON (roads_local.zipl = zipl.zip)
+    LEFT JOIN zip_lookup_base zipr ON (roads_local.zipr = zipr.zip)
+  WHERE includes_address(parsed.address, roads_local.fraddl, roads_local.toaddl,
+      roads_local.fraddr, roads_local.toaddr)
+  ORDER BY subquery.rating;
+
+  RETURN result;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_get_point.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_get_point.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_get_point.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,10 @@
+CREATE OR REPLACE FUNCTION geocode_get_point(VARCHAR) RETURNS GEOMETRY
+AS $_$
+DECLARE
+  ans RECORD;
+BEGIN
+  ans := geocode(NULL, $1);
+
+  RETURN centroid(ans.geom);
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_location.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_location.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_location.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,67 @@
+CREATE OR REPLACE FUNCTION geocode_location(
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  result REFCURSOR;
+  tempString VARCHAR;
+  tempInt VARCHAR;
+BEGIN
+  -- Try to match the city/state to a zipcode first
+  SELECT INTO tempInt count(*)
+    FROM zip_lookup_base zip
+    JOIN state_lookup sl ON (zip.state = sl.name)
+    JOIN zt99_d00 zl ON (lpad(zip.zip,5,'0') = zl.zcta)
+    WHERE soundex(zip.city) = soundex(parsed.location) and sl.abbrev = parsed.stateAbbrev;
+
+  -- If that worked, just use the zipcode lookup
+  IF tempInt > 0 THEN
+    OPEN result FOR
+    SELECT
+        NULL::varchar(2) as fedirp,
+        NULL::varchar(30) as fename,
+        NULL::varchar(4) as fetype,
+        NULL::varchar(2) as fedirs,
+        coalesce(zip.city) as place,
+        sl.abbrev as state,
+        parsed.zip as zip,
+        centroid(wkb_geometry) as address_geom,
+        100::integer as rating
+    FROM
+      zip_lookup_base zip
+      JOIN state_lookup sl on (zip.state = sl.name)
+      JOIN zt99_d00 zl ON (lpad(zip.zip,5,'0') = zl.zcta)
+    WHERE
+      soundex(zip.city) = soundex(parsed.location) and sl.abbrev = parsed.stateAbbrev;
+
+    RETURN result;
+  END IF;
+
+  -- Try to match the city/state to a place next
+  SELECT INTO tempInt count(*)
+    FROM pl99_d00 pl
+    JOIN state_lookup sl ON (pl.state = lpad(sl.st_code,2,'0'))
+    WHERE soundex(pl.name) = soundex(parsed.location) and sl.abbrev = parsed.stateAbbrev;
+
+  -- If that worked, just use the zipcode lookup
+  IF tempInt > 0 THEN
+    OPEN result FOR
+    SELECT
+        NULL::varchar(2) as fedirp,
+        NULL::varchar(30) as fename,
+        NULL::varchar(4) as fetype,
+        NULL::varchar(2) as fedirs,
+        pl.name as place,
+        sl.abbrev as state,
+        NULL::integer as zip,
+        centroid(wkb_geometry) as address_geom,
+        100::integer as rating
+    FROM pl99_d00 pl
+    JOIN state_lookup sl ON (pl.state = lpad(sl.st_code,2,'0'))
+    WHERE soundex(pl.name) = soundex(parsed.location) and sl.abbrev = parsed.stateAbbrev;
+
+    RETURN result;
+  END IF;
+  RETURN result;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/geocode_zip.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/geocode_zip.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/geocode_zip.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,41 @@
+CREATE OR REPLACE FUNCTION geocode_zip(
+    parsed NORM_ADDY
+) RETURNS REFCURSOR
+AS $_$
+DECLARE
+  result REFCURSOR;
+  tempString VARCHAR;
+  tempInt VARCHAR;
+BEGIN
+  -- Check to see if the road name can be matched.
+  SELECT INTO tempInt count(*)
+    FROM zip_lookup_base zip
+    JOIN state_lookup sl on (zip.state = sl.name)
+    JOIN zt99_d00 zl ON (lpad(zip.zip,5,'0') = zl.zcta)
+    WHERE zip = parsed.zip;
+
+  IF tempInt = 0 THEN
+    RETURN NULL;
+  END IF;
+
+  OPEN result FOR
+  SELECT
+      NULL::varchar(2) as fedirp,
+      NULL::varchar(30) as fename,
+      NULL::varchar(4) as fetype,
+      NULL::varchar(2) as fedirs,
+      coalesce(zip.city) as place,
+      sl.abbrev as state,
+      parsed.zip as zip,
+      centroid(wkb_geometry) as address_geom,
+      100::integer as rating
+  FROM
+    zip_lookup_base zip
+    JOIN state_lookup sl on (zip.state = sl.name)
+    JOIN zt99_d00 zl ON (lpad(zip.zip,5,'0') = zl.zcta)
+  WHERE
+    zip.zip = parsed.zip;
+
+  RETURN result;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/includes_address.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/includes_address.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/includes_address.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,93 @@
+-- This function requires the addresses to be grouped, such that the second and
+-- third arguments are from one side of the street, and the fourth and fifth
+-- from the other.
+CREATE OR REPLACE FUNCTION includes_address(
+    given_address INTEGER,
+    addr1 INTEGER,
+    addr2 INTEGER,
+    addr3 INTEGER,
+    addr4 INTEGER
+) RETURNS BOOLEAN
+AS $_$
+DECLARE
+  lmaxaddr INTEGER := -1;
+  rmaxaddr INTEGER := -1;
+  lminaddr INTEGER := -1;
+  rminaddr INTEGER := -1;
+  maxaddr INTEGER := -1;
+  minaddr INTEGER := -1;
+  verbose BOOLEAN := false;
+BEGIN
+  IF addr1 IS NOT NULL THEN
+    maxaddr := addr1;
+    minaddr := addr1;
+    lmaxaddr := addr1;
+    lminaddr := addr1;
+  END IF;
+
+  IF addr2 IS NOT NULL THEN
+    IF addr2 < minaddr OR minaddr = -1 THEN
+      minaddr := addr2;
+    END IF;
+    IF addr2 > maxaddr OR maxaddr = -1 THEN
+      maxaddr := addr2;
+    END IF;
+    IF addr2 > lmaxaddr OR lmaxaddr = -1 THEN
+      lmaxaddr := addr2;
+    END IF;
+    IF addr2 < lminaddr OR lminaddr = -1 THEN
+      lminaddr := addr2;
+    END IF;
+  END IF;
+
+  IF addr3 IS NOT NULL THEN
+    IF addr3 < minaddr OR minaddr = -1 THEN
+      minaddr := addr3;
+    END IF;
+    IF addr3 > maxaddr OR maxaddr = -1 THEN
+      maxaddr := addr3;
+    END IF;
+    rmaxaddr := addr3;
+    rminaddr := addr3;
+  END IF;
+
+  IF addr4 IS NOT NULL THEN
+    IF addr4 < minaddr OR minaddr = -1 THEN
+      minaddr := addr4;
+    END IF;
+    IF addr4 > maxaddr OR maxaddr = -1 THEN
+      maxaddr := addr4;
+    END IF;
+    IF addr4 > rmaxaddr OR rmaxaddr = -1 THEN
+      rmaxaddr := addr4;
+    END IF;
+    IF addr4 < rminaddr OR rminaddr = -1 THEN
+      rminaddr := addr4;
+    END IF;
+  END IF;
+
+  IF minaddr = -1 OR maxaddr = -1 THEN
+    -- No addresses were non-null, return FALSE (arbitrary)
+    RETURN FALSE;
+  ELSIF given_address >= minaddr AND given_address <= maxaddr THEN
+    -- The address is within the given range
+    IF given_address >= lminaddr AND given_address <= lmaxaddr THEN
+      -- This checks to see if the address is on this side of the
+      -- road, ie if the address is even, the street range must be even
+      IF (given_address % 2) = (lminaddr % 2)
+          OR (given_address % 2) = (lmaxaddr % 2) THEN
+        RETURN TRUE;
+      END IF;
+    END IF;
+    IF given_address >= rminaddr AND given_address <= rmaxaddr THEN
+      -- See above
+      IF (given_address % 2) = (rminaddr % 2)
+          OR (given_address % 2) = (rmaxaddr % 2) THEN
+        RETURN TRUE;
+      END IF;
+    END IF;
+  END IF;
+  -- The address is not within the range
+  RETURN FALSE;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/interpolate_from_address.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/interpolate_from_address.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/interpolate_from_address.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,123 @@
+-- This function converts string addresses to integers and passes them to
+-- the other interpolate_from_address function.
+CREATE OR REPLACE FUNCTION interpolate_from_address(INTEGER, VARCHAR, VARCHAR, VARCHAR, VARCHAR, GEOMETRY) RETURNS GEOMETRY
+AS $_$
+DECLARE
+  given_address INTEGER;
+  addr1 INTEGER;
+  addr2 INTEGER;
+  addr3 INTEGER;
+  addr4 INTEGER;
+  road GEOMETRY;
+  result GEOMETRY;
+BEGIN
+  given_address := $1;
+  addr1 := to_number($2, '999999');
+  addr2 := to_number($3, '999999');
+  addr3 := to_number($4, '999999');
+  addr4 := to_number($5, '999999');
+  road := $6;
+  result = interpolate_from_address(given_address, addr1, addr2, addr3, addr4, road);
+  RETURN result;
+END
+$_$ LANGUAGE plpgsql;
+
+-- interpolate_from_address(local_address, from_address_l, to_address_l, from_address_r, to_address_r, local_road)
+-- This function returns a point along the given geometry (must be linestring)
+-- corresponding to the given address.  If the given address is not within
+-- the address range of the road, null is returned.
+-- This function requires that the address be grouped, such that the second and
+-- third arguments are from one side of the street, while the fourth and
+-- fifth are from the other.
+CREATE OR REPLACE FUNCTION interpolate_from_address(INTEGER, INTEGER, INTEGER, INTEGER, INTEGER, GEOMETRY) RETURNS GEOMETRY
+AS $_$
+DECLARE
+  given_address INTEGER;
+  lmaxaddr INTEGER := -1;
+  rmaxaddr INTEGER := -1;
+  lminaddr INTEGER := -1;
+  rminaddr INTEGER := -1;
+  lfrgreater BOOLEAN;
+  rfrgreater BOOLEAN;
+  frgreater BOOLEAN;
+  addrwidth INTEGER;
+  part DOUBLE PRECISION;
+  road GEOMETRY;
+  result GEOMETRY;
+BEGIN
+  IF $1 IS NULL THEN
+    RETURN NULL;
+  ELSE
+    given_address := $1;
+  END IF;
+
+  IF $6 IS NULL THEN
+    RETURN NULL;
+  ELSE
+    IF geometrytype($6) = 'LINESTRING' THEN
+      road := $6;
+    ELSIF geometrytype($6) = 'MULTILINESTRING' THEN
+      road := geometryn($6,1);
+    ELSE
+      RETURN NULL;
+    END IF;
+  END IF;
+
+  IF $2 IS NOT NULL THEN
+    lfrgreater := TRUE;
+    lmaxaddr := $2;
+    lminaddr := $2;
+  END IF;
+
+  IF $3 IS NOT NULL THEN
+    IF $3 > lmaxaddr OR lmaxaddr = -1 THEN
+      lmaxaddr := $3;
+      lfrgreater := FALSE;
+    END IF;
+    IF $3 < lminaddr OR lminaddr = -1 THEN
+      lminaddr := $3;
+    END IF;
+  END IF;
+
+  IF $4 IS NOT NULL THEN
+    rmaxaddr := $4;
+    rminaddr := $4;
+    rfrgreater := TRUE;
+  END IF;
+
+  IF $5 IS NOT NULL THEN
+    IF $5 > rmaxaddr OR rmaxaddr = -1 THEN
+      rmaxaddr := $5;
+      rfrgreater := FALSE;
+    END IF;
+    IF $5 < rminaddr OR rminaddr = -1 THEN
+      rminaddr := $5;
+    END IF;
+  END IF;
+
+  IF given_address >= lminaddr AND given_address <= lmaxaddr THEN
+    IF (given_address % 2) = (lminaddr % 2)
+        OR (given_address % 2) = (lmaxaddr % 2) THEN
+      addrwidth := lmaxaddr - lminaddr;
+      part := (given_address - lminaddr) / trunc(addrwidth, 1);
+      frgreater := lfrgreater;
+    END IF;
+  END IF;
+
+  IF given_address >= rminaddr AND given_address <= rmaxaddr THEN
+    IF (given_address % 2) = (rminaddr % 2)
+        OR (given_address % 2) = (rmaxaddr % 2) THEN
+      addrwidth := rmaxaddr - rminaddr;
+      part := (given_address - rminaddr) / trunc(addrwidth, 1);
+      frgreater := rfrgreater;
+    END IF;
+  END IF;
+
+  IF frgreater THEN
+    part := 1 - part;
+  END IF;
+
+  result = line_interpolate_point(road, part);
+  RETURN result;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/geocode/rate_attributes.sql
===================================================================
--- trunk/extras/tiger_geocoder/geocode/rate_attributes.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/geocode/rate_attributes.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,58 @@
+-- rate_attributes(dirpA, dirpB, streetNameA, streetNameB, streetTypeA,
+-- streetTypeB, dirsA, dirsB, locationA, locationB)
+-- Rates the street based on the given attributes.  The locations must be
+-- non-null.  The other eight values are handled by the other rate_attributes
+-- function, so it's requirements must also be met.
+CREATE OR REPLACE FUNCTION rate_attributes(VARCHAR, VARCHAR, VARCHAR, VARCHAR,
+    VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR) RETURNS INTEGER
+AS $_$
+DECLARE
+  result INTEGER := 0;
+  locationWeight INTEGER := 14;
+  verbose BOOLEAN := FALSE;
+BEGIN
+  IF $9 IS NOT NULL AND $10 IS NOT NULL THEN
+    result := levenshtein_ignore_case($9, $10);
+  ELSE
+    IF verbose THEN
+      RAISE NOTICE 'rate_attributes() - Location names cannot be null!';
+    END IF;
+    RETURN NULL;
+  END IF;
+  result := result + rate_attributes($1, $2, $3, $4, $5, $6, $7, $8);
+  RETURN result;
+END;
+$_$ LANGUAGE plpgsql;
+
+-- rate_attributes(dirpA, dirpB, streetNameA, streetNameB, streetTypeA,
+-- streetTypeB, dirsA, dirsB)
+-- Rates the street based on the given attributes.  Only streetNames are
+-- required.  If any others are null (either A or B) they are treated as
+-- empty strings.
+CREATE OR REPLACE FUNCTION rate_attributes(VARCHAR, VARCHAR, VARCHAR, VARCHAR,
+    VARCHAR, VARCHAR, VARCHAR, VARCHAR) RETURNS INTEGER
+AS $_$
+DECLARE
+  result INTEGER := 0;
+  directionWeight INTEGER := 2;
+  nameWeight INTEGER := 10;
+  typeWeight INTEGER := 5;
+  verbose BOOLEAN := FALSE;
+BEGIN
+  result := result + levenshtein_ignore_case(cull_null($1), cull_null($2)) *
+      directionWeight;
+  IF $3 IS NOT NULL AND $4 IS NOT NULL THEN
+    result := result + levenshtein_ignore_case($3, $4) * nameWeight;
+  ELSE
+    IF verbose THEN
+      RAISE NOTICE 'rate_attributes() - Street names cannot be null!';
+    END IF;
+    RETURN NULL;
+  END IF;
+  result := result + levenshtein_ignore_case(cull_null($5), cull_null($6)) *
+      typeWeight;
+  result := result + levenshtein_ignore_case(cull_null($7), cull_null($7)) *
+      directionWeight;
+  return result;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/count_words.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/count_words.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/count_words.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,32 @@
+-- Determine the number of words in a string.  Words are allowed to
+-- be seperated only by spaces, but multiple spaces between
+-- words are allowed.
+CREATE OR REPLACE FUNCTION count_words(VARCHAR) RETURNS INTEGER
+AS $_$
+DECLARE
+  tempString VARCHAR;
+  tempInt INTEGER;
+  count INTEGER := 1;
+  lastSpace BOOLEAN := FALSE;
+BEGIN
+  IF $1 IS NULL THEN
+    return -1;
+  END IF;
+  tempInt := length($1);
+  IF tempInt = 0 THEN
+    return 0;
+  END IF;
+  FOR i IN 1..tempInt LOOP
+    tempString := substring($1 from i for 1);
+    IF tempString = ' ' THEN
+      IF NOT lastSpace THEN
+        count := count + 1;
+      END IF;
+      lastSpace := TRUE;
+    ELSE
+      lastSpace := FALSE;
+    END IF;
+  END LOOP;
+  return count;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/end_soundex.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/end_soundex.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/end_soundex.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,17 @@
+-- Runs the soundex function on the last word in the string provided.
+-- Words are allowed to be seperated by space, comma, period, new-line
+-- tab or form feed.
+CREATE OR REPLACE FUNCTION end_soundex(VARCHAR) RETURNS VARCHAR
+AS $_$
+DECLARE
+  tempString VARCHAR;
+BEGIN
+  tempString := substring($1, E'[ ,.\n\t\f]([a-zA-Z0-9]*)$');
+  IF tempString IS NOT NULL THEN
+    tempString := soundex(tempString);
+  ELSE
+    tempString := soundex($1);
+  END IF;
+  return tempString;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/get_last_words.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/get_last_words.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/get_last_words.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,29 @@
+-- Returns a string consisting of the last N words.  Words are allowed
+-- to be seperated only by spaces, but multiple spaces between
+-- words are allowed.  Words must be alphanumberic.
+-- If more words are requested than exist, the full input string is
+-- returned.
+CREATE OR REPLACE FUNCTION get_last_words(
+    inputString VARCHAR,
+    count INTEGER
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+  tempString VARCHAR;
+  result VARCHAR := '';
+BEGIN
+  FOR i IN 1..count LOOP
+    tempString := substring(inputString from '((?: )+[a-zA-Z0-9_]*)' || result || '$');
+
+    IF tempString IS NULL THEN
+      RETURN inputString;
+    END IF;
+
+    result := tempString || result;
+  END LOOP;
+
+  result := trim(both from result);
+
+  RETURN result;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/location_extract.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,36 @@
+-- location_extract(streetAddressString, stateAbbreviation)
+-- This function extracts a location name from the end of the given string.
+-- The first attempt is to find an exact match against the place_lookup
+-- table.  If this fails, a word-by-word soundex match is tryed against the
+-- same table.  If multiple candidates are found, the one with the smallest
+-- levenshtein distance from the given string is assumed the correct one.
+-- If no match is found against the place_lookup table, the same tests are
+-- run against the countysub_lookup table.
+--
+-- The section of the given string corresponding to the location found is
+-- returned, rather than the string found from the tables.  All the searching
+-- is done largely to determine the length (words) of the location, to allow
+-- the intended street name to be correctly identified.
+CREATE OR REPLACE FUNCTION location_extract(fullStreet VARCHAR, stateAbbrev VARCHAR) RETURNS VARCHAR
+AS $_$
+DECLARE
+  location VARCHAR;
+BEGIN
+  IF fullStreet IS NULL THEN
+    RETURN NULL;
+  END IF;
+
+  location := location_extract_place_exact(fullStreet, stateAbbrev);
+  IF location IS NULL THEN
+    location := location_extract_countysub_exact(fullStreet, stateAbbrev);
+    IF location IS NULL THEN
+      location := location_extract_place_fuzzy(fullStreet, stateAbbrev);
+      IF location IS NULL THEN
+        location := location_extract_countysub_fuzzy(fullStreet, stateAbbrev);
+      END IF;
+    END IF;
+  END IF;
+
+  RETURN location;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/location_extract_countysub_exact.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract_countysub_exact.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract_countysub_exact.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,54 @@
+-- location_extract_countysub_exact(string, stateAbbrev)
+-- This function checks the place_lookup table to find a potential match to
+-- the location described at the end of the given string.  If an exact match
+-- fails, a fuzzy match is performed.  The location as found in the given
+-- string is returned.
+CREATE OR REPLACE FUNCTION location_extract_countysub_exact(
+    fullStreet VARCHAR,
+    stateAbbrev VARCHAR
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+  ws VARCHAR;
+  location VARCHAR;
+  tempInt INTEGER;
+  rec RECORD;
+BEGIN
+  ws := E'[ ,.\n\f\t]';
+
+  -- No hope of determining the location from place. Try countysub.
+  IF stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*) FROM countysub_lookup
+        WHERE countysub_lookup.state = stateAbbrev
+        AND texticregexeq(fullStreet, '(?i)' || name || '$');
+  ELSE
+    SELECT INTO tempInt count(*) FROM countysub_lookup
+        WHERE texticregexeq(fullStreet, '(?i)' || name || '$');
+  END IF;
+
+  IF tempInt > 0 THEN
+    IF stateAbbrev IS NOT NULL THEN
+      FOR rec IN SELECT substring(fullStreet, '(?i)('
+          || name || ')$') AS value, name FROM countysub_lookup
+          WHERE countysub_lookup.state = stateAbbrev
+          AND texticregexeq(fullStreet, '(?i)' || ws || name ||
+          '$') ORDER BY length(name) DESC LOOP
+        -- Only the first result is needed.
+        location := rec.value;
+        EXIT;
+      END LOOP;
+    ELSE
+      FOR rec IN SELECT substring(fullStreet, '(?i)('
+          || name || ')$') AS value, name FROM countysub_lookup
+          WHERE texticregexeq(fullStreet, '(?i)' || ws || name ||
+          '$') ORDER BY length(name) DESC LOOP
+        -- again, only the first is needed.
+        location := rec.value;
+        EXIT;
+      END LOOP;
+    END IF;
+  END IF;
+
+  RETURN location;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/location_extract_countysub_fuzzy.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract_countysub_fuzzy.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract_countysub_fuzzy.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,87 @@
+-- location_extract_countysub_fuzzy(string, stateAbbrev)
+-- This function checks the place_lookup table to find a potential match to
+-- the location described at the end of the given string.  If an exact match
+-- fails, a fuzzy match is performed.  The location as found in the given
+-- string is returned.
+CREATE OR REPLACE FUNCTION location_extract_countysub_fuzzy(
+    fullStreet VARCHAR,
+    stateAbbrev VARCHAR
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+  ws VARCHAR;
+  tempString VARCHAR;
+  location VARCHAR;
+  tempInt INTEGER;
+  word_count INTEGER;
+  rec RECORD;
+  test BOOLEAN;
+BEGIN
+  ws := E'[ ,.\n\f\t]';
+
+  -- Fuzzy matching.
+  tempString := substring(fullStreet, '(?i)' || ws ||
+      '([a-zA-Z0-9]+)$');
+  IF tempString IS NULL THEN
+    tempString := fullStreet;
+  END IF;
+
+  IF stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*) FROM countysub_lookup
+        WHERE countysub_lookup.state = stateAbbrev
+        AND soundex(tempString) = end_soundex(name);
+  ELSE
+    SELECT INTO tempInt count(*) FROM countysub_lookup
+        WHERE soundex(tempString) = end_soundex(name);
+  END IF;
+
+  IF tempInt > 0 THEN
+    tempInt := 50;
+    -- Some potentials were found.  Begin a word-by-word soundex on each.
+    IF stateAbbrev IS NOT NULL THEN
+      FOR rec IN SELECT name FROM countysub_lookup
+          WHERE countysub_lookup.state = stateAbbrev
+          AND soundex(tempString) = end_soundex(name) LOOP
+        word_count := count_words(rec.name);
+        test := TRUE;
+        tempString := get_last_words(fullStreet, word_count);
+        FOR i IN 1..word_count LOOP
+          IF soundex(split_part(tempString, ' ', i)) !=
+            soundex(split_part(rec.name, ' ', i)) THEN
+            test := FALSE;
+          END IF;
+        END LOOP;
+        IF test THEN
+      -- The soundex matched, determine if the distance is better.
+      IF levenshtein_ignore_case(rec.name, tempString) < tempInt THEN
+            location := tempString;
+        tempInt := levenshtein_ignore_case(rec.name, tempString);
+      END IF;
+        END IF;
+      END LOOP;
+    ELSE
+      FOR rec IN SELECT name FROM countysub_lookup
+          WHERE soundex(tempString) = end_soundex(name) LOOP
+        word_count := count_words(rec.name);
+        test := TRUE;
+        tempString := get_last_words(fullStreet, word_count);
+        FOR i IN 1..word_count LOOP
+          IF soundex(split_part(tempString, ' ', i)) !=
+            soundex(split_part(rec.name, ' ', i)) THEN
+            test := FALSE;
+          END IF;
+        END LOOP;
+        IF test THEN
+      -- The soundex matched, determine if the distance is better.
+      IF levenshtein_ignore_case(rec.name, tempString) < tempInt THEN
+            location := tempString;
+        tempInt := levenshtein_ignore_case(rec.name, tempString);
+      END IF;
+        END IF;
+      END LOOP;
+    END IF;
+  END IF; -- If no fuzzys were found, leave location null.
+
+  RETURN location;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/location_extract_place_exact.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract_place_exact.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract_place_exact.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,57 @@
+-- location_extract_place_exact(string, stateAbbrev)
+-- This function checks the place_lookup table to find a potential match to
+-- the location described at the end of the given string.  If an exact match
+-- fails, a fuzzy match is performed.  The location as found in the given
+-- string is returned.
+CREATE OR REPLACE FUNCTION location_extract_place_exact(
+    fullStreet VARCHAR,
+    stateAbbrev VARCHAR
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+  ws VARCHAR;
+  location VARCHAR;
+  tempInt INTEGER;
+  rec RECORD;
+BEGIN
+  ws := E'[ ,.\n\f\t]';
+
+  -- Try for an exact match against places
+  IF stateAbbrev IS NOT NULL THEN
+    SELECT INTO tempInt count(*) FROM place_lookup
+        WHERE place_lookup.state = stateAbbrev
+        AND texticregexeq(fullStreet, '(?i)' || name || '$');
+  ELSE
+    SELECT INTO tempInt count(*) FROM place_lookup
+        WHERE texticregexeq(fullStreet, '(?i)' || name || '$');
+  END IF;
+
+  IF tempInt > 0 THEN
+    -- Some matches were found.  Look for the last one in the string.
+    IF stateAbbrev IS NOT NULL THEN
+      FOR rec IN SELECT substring(fullStreet, '(?i)('
+          || name || ')$') AS value, name FROM place_lookup
+          WHERE place_lookup.state = stateAbbrev
+          AND texticregexeq(fullStreet, '(?i)'
+          || name || '$') ORDER BY length(name) DESC LOOP
+        -- Since the regex is end of string, only the longest (first) result
+        -- is useful.
+        location := rec.value;
+        EXIT;
+      END LOOP;
+    ELSE
+      FOR rec IN SELECT substring(fullStreet, '(?i)('
+          || name || ')$') AS value, name FROM place_lookup
+          WHERE texticregexeq(fullStreet, '(?i)'
+          || name || '$') ORDER BY length(name) DESC LOOP
+        -- Since the regex is end of string, only the longest (first) result
+        -- is useful.
+        location := rec.value;
+        EXIT;
+      END LOOP;
+    END IF;
+  END IF;
+
+  RETURN location;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/location_extract_place_fuzzy.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/location_extract_place_fuzzy.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/location_extract_place_fuzzy.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,86 @@
+-- location_extract_place_fuzzy(string, stateAbbrev)
+-- This function checks the place_lookup table to find a potential match to
+-- the location described at the end of the given string.  If an exact match
+-- fails, a fuzzy match is performed.  The location as found in the given
+-- string is returned.
+CREATE OR REPLACE FUNCTION location_extract_place_fuzzy(
+    fullStreet VARCHAR,
+    stateAbbrev VARCHAR
+) RETURNS VARCHAR
+AS $_$
+DECLARE
+  ws VARCHAR;
+  tempString VARCHAR;
+  location VARCHAR;
+  tempInt INTEGER;
+  word_count INTEGER;
+  rec RECORD;
+  test BOOLEAN;
+BEGIN
+  ws := E'[ ,.\n\f\t]';
+
+  tempString := substring(fullStreet, '(?i)' || ws
+      || '([a-zA-Z0-9]+)$');
+  IF tempString IS NULL THEN
+      tempString := fullStreet;
+  END IF;
+
+  IF stateAbbrev IS NOT NULL THEN
+    SELECT into tempInt count(*) FROM place_lookup
+        WHERE place_lookup.state = stateAbbrev
+        AND soundex(tempString) = end_soundex(name);
+  ELSE
+    SELECT into tempInt count(*) FROM place_lookup
+        WHERE soundex(tempString) = end_soundex(name);
+  END IF;
+
+  IF tempInt > 0 THEN
+    -- Some potentials were found.  Begin a word-by-word soundex on each.
+    tempInt := 50;
+    IF stateAbbrev IS NOT NULL THEN
+      FOR rec IN SELECT name FROM place_lookup
+          WHERE place_lookup.state = stateAbbrev
+          AND soundex(tempString) = end_soundex(name) LOOP
+        word_count := count_words(rec.name);
+        test := TRUE;
+        tempString := get_last_words(fullStreet, word_count);
+        FOR i IN 1..word_count LOOP
+          IF soundex(split_part(tempString, ' ', i)) !=
+            soundex(split_part(rec.name, ' ', i)) THEN
+            test := FALSE;
+          END IF;
+        END LOOP;
+          IF test THEN
+            -- The soundex matched, determine if the distance is better.
+            IF levenshtein_ignore_case(rec.name, tempString) < tempInt THEN
+              location := tempString;
+              tempInt := levenshtein_ignore_case(rec.name, tempString);
+            END IF;
+          END IF;
+      END LOOP;
+    ELSE
+      FOR rec IN SELECT name FROM place_lookup
+          WHERE soundex(tempString) = end_soundex(name) LOOP
+        word_count := count_words(rec.name);
+        test := TRUE;
+        tempString := get_last_words(fullStreet, word_count);
+        FOR i IN 1..word_count LOOP
+          IF soundex(split_part(tempString, ' ', i)) !=
+            soundex(split_part(rec.name, ' ', i)) THEN
+            test := FALSE;
+          END IF;
+        END LOOP;
+          IF test THEN
+            -- The soundex matched, determine if the distance is better.
+            IF levenshtein_ignore_case(rec.name, tempString) < tempInt THEN
+              location := tempString;
+            tempInt := levenshtein_ignore_case(rec.name, tempString);
+          END IF;
+        END IF;
+      END LOOP;
+    END IF;
+  END IF;
+
+  RETURN location;
+END;
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/normalize_address.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/normalize_address.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/normalize_address.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,493 @@
+-- normalize_address(addressString)
+-- This takes an address string and parses it into address (internal/street)
+-- street name, type, direction prefix and suffix, location, state and
+-- zip code, depending on what can be found in the string.
+--
+-- The US postal address standard is used:
+-- <Street Number> <Direction Prefix> <Street Name> <Street Type>
+-- <Direction Suffix> <Internal Address> <Location> <State> <Zip Code>
+--
+-- State is assumed to be included in the string, and MUST be matchable to
+-- something in the state_lookup table.  Fuzzy matching is used if no direct
+-- match is found.
+--
+-- Two formats of zip code are acceptable: five digit, and five + 4.
+--
+-- The internal addressing indicators are looked up from the
+-- secondary_unit_lookup table.  A following identifier is accepted
+-- but it must start with a digit.
+--
+-- The location is parsed from the string using other indicators, such
+-- as street type, direction suffix or internal address, if available.
+-- If these are not, the location is extracted using comparisons against
+-- the places_lookup table, then the countysub_lookup table to determine
+-- what, in the original string, is intended to be the location.  In both
+-- cases, an exact match is first pursued, then a word-by-word fuzzy match.
+-- The result is not the name of the location from the tables, but the
+-- section of the given string that corresponds to the name from the tables.
+--
+-- Zip codes and street names are not validated.
+--
+-- Direction indicators are extracted by comparison with the direction_lookup
+-- table.
+--
+-- Street addresses are assumed to be a single word, starting with a number.
+-- Address is manditory; if no address is given, and the street is numbered,
+-- the resulting address will be the street name, and the street name
+-- will be an empty string.
+--
+-- In some cases, the street type is part of the street name.
+-- eg State Hwy 22a.  As long as the word following the type starts with a
+-- number (this is usually the case) this will be caught.  Some street names
+-- include a type name, and have a street type that differs.  This will be
+-- handled properly, so long as both are given.  If the street type is
+-- omitted, the street names included type will be parsed as the street type.
+--
+-- The output is currently a colon seperated list of values:
+-- InternalAddress:StreetAddress:DirectionPrefix:StreetName:StreetType:
+-- DirectionSuffix:Location:State:ZipCode
+-- This returns each element as entered.  It's mainly meant for debugging.
+-- There is also another option that returns:
+-- StreetAddress:DirectionPrefixAbbreviation:StreetName:StreetTypeAbbreviation:
+-- DirectionSuffixAbbreviation:Location:StateAbbreviation:ZipCode
+-- This is more standardized and better for use with a geocoder.
+CREATE OR REPLACE FUNCTION normalize_address(
+    rawInput VARCHAR
+) RETURNS norm_addy
+AS $_$
+DECLARE
+  result norm_addy;
+  addressString VARCHAR;
+  zipString VARCHAR;
+  preDir VARCHAR;
+  postDir VARCHAR;
+  fullStreet VARCHAR;
+  reducedStreet VARCHAR;
+  streetType VARCHAR;
+  state VARCHAR;
+  tempString VARCHAR;
+  tempInt INTEGER;
+  rec RECORD;
+  ws VARCHAR;
+BEGIN
+  result.parsed := FALSE;
+
+  IF rawInput IS NULL THEN
+    RETURN result;
+  END IF;
+
+  ws := E'[ ,.\t\n\f\r]';
+
+  -- Assume that the address begins with a digit, and extract it from
+  -- the input string.
+  addressString := substring(rawInput from '^([0-9].*?)[ ,/.]');
+
+  -- There are two formats for zip code, the normal 5 digit, and
+  -- the nine digit zip-4.  It may also not exist.
+  zipString := substring(rawInput from ws || '([0-9]{5})$');
+  IF zipString IS NULL THEN
+    zipString := substring(rawInput from ws || '([0-9]{5})-[0-9]{4}$');
+    -- Check if all we got was a zipcode, of either form
+    IF zipString IS NULL THEN
+      zipString := substring(rawInput from '^([0-9]{5})$');
+      IF zipString IS NULL THEN
+        zipString := substring(rawInput from '^([0-9]{5})-[0-9]{4}$');
+      END IF;
+      -- If it was only a zipcode, then just return it.
+      IF zipString IS NOT NULL THEN
+        result.zip := to_number(zipString, '99999');
+        result.parsed := TRUE;
+        RETURN result;
+      END IF;
+    END IF;
+  END IF;
+
+  IF zipString IS NOT NULL THEN
+    fullStreet := substring(rawInput from '(.*)'
+        || ws || '+' || cull_null(zipString) || '[- ]?([0-9]{4})?$');
+  ELSE
+    fullStreet := rawInput;
+  END IF;
+
+  -- FIXME: state_extract should probably be returning a record so we can
+  -- avoid having to parse the result from it.
+  tempString := state_extract(fullStreet);
+  IF tempString IS NOT NULL THEN
+    state := split_part(tempString, ':', 1);
+    result.stateAbbrev := split_part(tempString, ':', 2);
+  END IF;
+
+  -- The easiest case is if the address is comma delimited.  There are some
+  -- likely cases:
+  --   street level, location, state
+  --   street level, location state
+  --   street level, location
+  --   street level, internal address, location, state
+  --   street level, internal address, location state
+  --   street level, internal address location state
+  --   street level, internal address, location
+  --   street level, internal address location
+  -- The first three are useful.
+  tempString := substring(fullStreet, '(?i),' || ws || '+(.*?)(,?' || ws ||
+      '*' || cull_null(state) || '$)');
+  IF tempString = '' THEN tempString := NULL; END IF;
+  IF tempString IS NOT NULL THEN
+    result.location := tempString;
+    IF addressString IS NOT NULL THEN
+      fullStreet := substring(fullStreet, '(?i)' || addressString || ws ||
+          '+(.*),' || ws || '+' || result.location);
+    ELSE
+      fullStreet := substring(fullStreet, '(?i)(.*),' || ws || '+' ||
+          result.location);
+    END IF;
+  END IF;
+
+  -- Pull out the full street information, defined as everything between the
+  -- address and the state.  This includes the location.
+  -- This doesnt need to be done if location has already been found.
+  IF result.location IS NULL THEN
+    IF addressString IS NOT NULL THEN
+      IF state IS NOT NULL THEN
+        fullStreet := substring(fullStreet, '(?i)' || addressString ||
+            ws || '+(.*?)' || ws || '+' || state);
+      ELSE
+        fullStreet := substring(fullStreet, '(?i)' || addressString ||
+            ws || '+(.*?)');
+      END IF;
+    ELSE
+      IF state IS NOT NULL THEN
+        fullStreet := substring(fullStreet, '(?i)(.*?)' || ws ||
+            '+' || state);
+      ELSE
+        fullStreet := substring(fullStreet, '(?i)(.*?)');
+      END IF;
+    END IF;
+  END IF;
+
+  -- Determine if any internal address is included, such as apartment
+  -- or suite number.
+  SELECT INTO tempInt count(*) FROM secondary_unit_lookup
+      WHERE texticregexeq(fullStreet, '(?i)' || ws || name || '('
+          || ws || '|$)');
+  IF tempInt = 1 THEN
+    SELECT INTO result.internal substring(fullStreet, '(?i)' || ws || '('
+        || name ||  ws || '*#?' || ws
+        || '*(?:[0-9][-0-9a-zA-Z]*)?' || ')(?:' || ws || '|$)')
+        FROM secondary_unit_lookup
+        WHERE texticregexeq(fullStreet, '(?i)' || ws || name || '('
+        || ws || '|$)');
+    ELSIF tempInt > 1 THEN
+    -- In the event of multiple matches to a secondary unit designation, we
+    -- will assume that the last one is the true one.
+    tempInt := 0;
+    FOR rec in SELECT trim(substring(fullStreet, '(?i)' || ws || '('
+        || name || '(?:' || ws || '*#?' || ws
+        || '*(?:[0-9][-0-9a-zA-Z]*)?)' || ws || '?|$)')) as value
+        FROM secondary_unit_lookup
+        WHERE texticregexeq(fullStreet, '(?i)' || ws || name || '('
+        || ws || '|$)') LOOP
+      IF tempInt < position(rec.value in fullStreet) THEN
+        tempInt := position(rec.value in fullStreet);
+        result.internal := rec.value;
+      END IF;
+    END LOOP;
+  END IF;
+
+  IF result.location IS NULL THEN
+    -- If the internal address is given, the location is everything after it.
+    result.location := substring(fullStreet, result.internal || ws || '+(.*)$');
+  END IF;
+
+  -- Pull potential street types from the full street information
+  SELECT INTO tempInt count(*) FROM street_type_lookup
+      WHERE texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+      || ')(?:' || ws || '|$)');
+  IF tempInt = 1 THEN
+    SELECT INTO rec abbrev, substring(fullStreet, '(?i)' || ws || '('
+        || name || ')(?:' || ws || '|$)') AS given FROM street_type_lookup
+        WHERE texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+        || ')(?:' || ws || '|$)');
+    streetType := rec.given;
+    result.streetTypeAbbrev := rec.abbrev;
+  ELSIF tempInt > 1 THEN
+    tempInt := 0;
+    FOR rec IN SELECT abbrev, substring(fullStreet, '(?i)' || ws || '('
+        || name || ')(?:' || ws || '|$)') AS given FROM street_type_lookup
+        WHERE texticregexeq(fullStreet, '(?i)' || ws || '(' || name
+        || ')(?:' || ws || '|$)') LOOP
+      -- If we have found an internal address, make sure the type
+      -- precedes it.
+      IF result.internal IS NOT NULL THEN
+        IF position(rec.given IN fullStreet) < position(result.internal IN fullStreet) THEN
+          IF tempInt < position(rec.given IN fullStreet) THEN
+            streetType := rec.given;
+            result.streetTypeAbbrev := rec.abbrev;
+            tempInt := position(rec.given IN fullStreet);
+          END IF;
+        END IF;
+      ELSIF tempInt < position(rec.given IN fullStreet) THEN
+        streetType := rec.given;
+        result.streetTypeAbbrev := rec.abbrev;
+        tempInt := position(rec.given IN fullStreet);
+      END IF;
+    END LOOP;
+  END IF;
+
+  -- There is a little more processing required now.  If the word after the
+  -- street type begins with a number, the street type should be considered
+  -- part of the name, as well as the next word.  eg, State Route 225a.  If
+  -- the next word starts with a char, then everything after the street type
+  -- will be considered location.  If there is no street type, then I'm sad.
+  IF streetType IS NOT NULL THEN
+    tempString := substring(fullStreet, streetType || ws ||
+        E'+([0-9][^ ,.\t\r\n\f]*?)' || ws);
+    IF tempString IS NOT NULL THEN
+      IF result.location IS NULL THEN
+        result.location := substring(fullStreet, streetType || ws || '+'
+                 || tempString || ws || '+(.*)$');
+      END IF;
+      reducedStreet := substring(fullStreet, '(.*)' || ws || '+'
+                    || result.location || '$');
+      streetType := NULL;
+      result.streetTypeAbbrev := NULL;
+    ELSE
+      IF result.location IS NULL THEN
+        result.location := substring(fullStreet, streetType || ws || '+(.*)$');
+      END IF;
+      reducedStreet := substring(fullStreet, '^(.*)' || ws || '+'
+                    || streetType);
+    END IF;
+
+    -- The pre direction should be at the beginning of the fullStreet string.
+    -- The post direction should be at the beginning of the location string
+    -- if there is no internal address
+    SELECT INTO tempString substring(reducedStreet, '(?i)(^' || name
+        || ')' || ws) FROM direction_lookup WHERE
+        texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+        ORDER BY length(name) DESC;
+    IF tempString IS NOT NULL THEN
+      preDir := tempString;
+      SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup
+          where texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+          ORDER BY length(name) DESC;
+      result.streetName := substring(reducedStreet, '^' || preDir || ws || '(.*)');
+    ELSE
+      result.streetName := reducedStreet;
+    END IF;
+
+    IF texticregexeq(result.location, '(?i)' || result.internal || '$') THEN
+      -- If the internal address is at the end of the location, then no
+      -- location was given.  We still need to look for post direction.
+      SELECT INTO rec abbrev,
+          substring(result.location, '(?i)^(' || name || ')' || ws) as value
+          FROM direction_lookup WHERE texticregexeq(result.location, '(?i)^'
+          || name || ws) ORDER BY length(name) desc;
+      IF rec.value IS NOT NULL THEN
+        postDir := rec.value;
+        result.postDirAbbrev := rec.abbrev;
+      END IF;
+      result.location := null;
+    ELSIF result.internal IS NULL THEN
+      -- If no location is given, the location string will be the post direction
+      SELECT INTO tempInt count(*) FROM direction_lookup WHERE
+          upper(result.location) = upper(name);
+      IF tempInt != 0 THEN
+        postDir := result.location;
+        SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup WHERE
+            upper(postDir) = upper(name);
+        result.location := NULL;
+      ELSE
+        -- postDirection is not equal location, but may be contained in it.
+        SELECT INTO tempString substring(result.location, '(?i)(^' || name
+            || ')' || ws) FROM direction_lookup WHERE
+            texticregexeq(result.location, '(?i)(^' || name || ')' || ws)
+            ORDER BY length(name) desc;
+        IF tempString IS NOT NULL THEN
+          postDir := tempString;
+          SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup
+              where texticregexeq(result.location, '(?i)(^' || name || ')' || ws);
+          result.location := substring(result.location, '^' || postDir || ws || '+(.*)');
+        END IF;
+      END IF;
+    ELSE
+      -- internal is not null, but is not at the end of the location string
+      -- look for post direction before the internal address
+      SELECT INTO tempString substring(fullStreet, '(?i)' || streetType
+          || ws || '+(' || name || ')' || ws || '+' || result.internal)
+          FROM direction_lookup WHERE texticregexeq(fullStreet, '(?i)'
+          || ws || name || ws || '+' || result.internal) ORDER BY length(name) desc;
+      IF tempString IS NOT NULL THEN
+        postDir := tempString;
+        SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup
+            WHERE texticregexeq(fullStreet, '(?i)' || ws || name || ws);
+      END IF;
+    END IF;
+  ELSE
+  -- No street type was found
+
+    -- If an internal address was given, then the split becomes easy, and the
+    -- street name is everything before it, without directions.
+    IF result.internal IS NOT NULL THEN
+      reducedStreet := substring(fullStreet, '(?i)^(.*?)' || ws || '+'
+                    || result.internal);
+      SELECT INTO tempInt count(*) FROM direction_lookup WHERE
+          texticregexeq(reducedStreet, '(?i)' || ws || name || '$');
+      IF tempInt > 0 THEN
+        SELECT INTO postDir substring(reducedStreet, '(?i)' || ws || '('
+            || name || ')' || '$') FROM direction_lookup
+            WHERE texticregexeq(reducedStreet, '(?i)' || ws || name || '$');
+        SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup
+            WHERE texticregexeq(reducedStreet, '(?i)' || ws || name || '$');
+      END IF;
+      SELECT INTO tempString substring(reducedStreet, '(?i)^(' || name
+          || ')' || ws) FROM direction_lookup WHERE
+          texticregexeq(reducedStreet, '(?i)^(' || name || ')' || ws)
+          ORDER BY length(name) DESC;
+      IF tempString IS NOT NULL THEN
+        preDir := tempString;
+        SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup WHERE
+            texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+            ORDER BY length(name) DESC;
+        result.streetName := substring(reducedStreet, '(?i)^' || preDir || ws
+                   || '+(.*?)(?:' || ws || '+' || cull_null(postDir) || '|$)');
+      ELSE
+        result.streetName := substring(reducedStreet, '(?i)^(.*?)(?:' || ws
+                   || '+' || cull_null(postDir) || '|$)');
+      END IF;
+    ELSE
+
+      -- If a post direction is given, then the location is everything after,
+      -- the street name is everything before, less any pre direction.
+      SELECT INTO tempInt count(*) FROM direction_lookup
+          WHERE texticregexeq(fullStreet, '(?i)' || ws || name || '(?:'
+              || ws || '|$)');
+
+      IF tempInt = 1 THEN
+        -- A single postDir candidate was found.  This makes it easier.
+        SELECT INTO postDir substring(fullStreet, '(?i)' || ws || '('
+            || name || ')(?:' || ws || '|$)') FROM direction_lookup WHERE
+            texticregexeq(fullStreet, '(?i)' || ws || name || '(?:'
+            || ws || '|$)');
+        SELECT INTO result.postDirAbbrev abbrev FROM direction_lookup
+            WHERE texticregexeq(fullStreet, '(?i)' || ws || name
+            || '(?:' || ws || '|$)');
+        IF result.location IS NULL THEN
+          result.location := substring(fullStreet, '(?i)' || ws || postDir
+                   || ws || '+(.*?)$');
+        END IF;
+        reducedStreet := substring(fullStreet, '^(.*?)' || ws || '+'
+                      || postDir);
+        SELECT INTO tempString substring(reducedStreet, '(?i)(^' || name
+            || ')' || ws) FROM direction_lookup WHERE
+            texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+            ORDER BY length(name) DESC;
+        IF tempString IS NOT NULL THEN
+          preDir := tempString;
+          SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup WHERE
+              texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+              ORDER BY length(name) DESC;
+          result.streetName := substring(reducedStreet, '^' || preDir || ws
+                     || '+(.*)');
+        ELSE
+          result.streetName := reducedStreet;
+        END IF;
+      ELSIF tempInt > 1 THEN
+        -- Multiple postDir candidates were found.  We need to find the last
+        -- incident of a direction, but avoid getting the last word from
+        -- a two word direction. eg extracting "East" from "North East"
+        -- We do this by sorting by length, and taking the last direction
+        -- in the results that is not included in an earlier one.
+        -- This wont be a problem it preDir is North East and postDir is
+        -- East as the regex requires a space before the direction.  Only
+        -- the East will return from the preDir.
+        tempInt := 0;
+        FOR rec IN SELECT abbrev, substring(fullStreet, '(?i)' || ws || '('
+            || name || ')(?:' || ws || '|$)') AS value
+            FROM direction_lookup
+            WHERE texticregexeq(fullStreet, '(?i)' || ws || name
+            || '(?:' || ws || '|$)')
+            ORDER BY length(name) desc LOOP
+          tempInt := 0;
+          IF tempInt < position(rec.value in fullStreet) THEN
+            IF postDir IS NULL THEN
+              tempInt := position(rec.value in fullStreet);
+              postDir := rec.value;
+              result.postDirAbbrev := rec.abbrev;
+            ELSIF NOT texticregexeq(postDir, '(?i)' || rec.value) THEN
+              tempInt := position(rec.value in fullStreet);
+              postDir := rec.value;
+              result.postDirAbbrev := rec.abbrev;
+             END IF;
+          END IF;
+        END LOOP;
+        IF result.location IS NULL THEN
+          result.location := substring(fullStreet, '(?i)' || ws || postDir || ws
+                   || '+(.*?)$');
+        END IF;
+        reducedStreet := substring(fullStreet, '(?i)^(.*?)' || ws || '+'
+                      || postDir);
+        SELECT INTO tempString substring(reducedStreet, '(?i)(^' || name
+            || ')' || ws) FROM direction_lookup WHERE
+            texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+            ORDER BY length(name) DESC;
+        IF tempString IS NOT NULL THEN
+          preDir := tempString;
+          SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup WHERE
+              texticregexeq(reducedStreet, '(?i)(^' || name || ')' || ws)
+              ORDER BY length(name) DESC;
+          result.streetName := substring(reducedStreet, '^' || preDir || ws
+                     || '+(.*)');
+        ELSE
+          result.streetName := reducedStreet;
+        END IF;
+      ELSE
+
+        -- There is no street type, directional suffix or internal address
+        -- to allow distinction between street name and location.
+        IF result.location IS NULL THEN
+          result.location := location_extract(fullStreet, result.stateAbbrev);
+          -- If the location was found, remove it from fullStreet
+          fullStreet := substring(fullStreet, '(?i)(.*),' || ws || '+' ||
+              result.location);
+        END IF;
+
+        -- Check for a direction prefix.
+        SELECT INTO tempString substring(fullStreet, '(?i)(^' || name
+            || ')' || ws) FROM direction_lookup WHERE
+            texticregexeq(fullStreet, '(?i)(^' || name || ')' || ws)
+            ORDER BY length(name);
+        IF tempString IS NOT NULL THEN
+          preDir := tempString;
+          SELECT INTO result.preDirAbbrev abbrev FROM direction_lookup WHERE
+              texticregexeq(fullStreet, '(?i)(^' || name || ')' || ws)
+              ORDER BY length(name) DESC;
+          IF result.location IS NOT NULL THEN
+            -- The location may still be in the fullStreet, or may
+            -- have been removed already
+            result.streetName := substring(fullStreet, '^' || preDir || ws
+                       || '+(.*?)(' || ws || '+' || result.location || '|$)');
+          ELSE
+            result.streetName := substring(fullStreet, '^' || preDir || ws
+                       || '+(.*?)' || ws || '*');
+          END IF;
+        ELSE
+          IF result.location IS NOT NULL THEN
+            -- The location may still be in the fullStreet, or may
+            -- have been removed already
+            result.streetName := substring(fullStreet, '^(.*?)(' || ws
+                       || '+' || result.location || '|$)');
+          ELSE
+            result.streetName := fullStreet;
+          END IF;
+        END IF;
+      END IF;
+    END IF;
+  END IF;
+
+  result.address := to_number(addressString, '99999999999');
+  result.zip := to_number(zipString, '99999');
+
+  result.parsed := TRUE;
+  RETURN result;
+END
+$_$ LANGUAGE plpgsql;

Added: trunk/extras/tiger_geocoder/normalize/state_extract.sql
===================================================================
--- trunk/extras/tiger_geocoder/normalize/state_extract.sql	2007-07-03 21:22:29 UTC (rev 2641)
+++ trunk/extras/tiger_geocoder/normalize/state_extract.sql	2007-07-03 21:30:34 UTC (rev 2642)
@@ -0,0 +1,82 @@
+-- state_extract(addressStringLessZipCode)
+-- Extracts the state from end of the given string.
+--
+-- This function uses the state_lookup table to determine which state
+-- the input string is indicating.  First, an exact match is pursued,
+-- and in the event of failure, a word-by-word fuzzy match is attempted.
+--
+-- The result is the state as given in the input string, and the approved
+-- state abbreviation, seperated by a colon.
+CREATE OR REPLACE FUNCTION state_extract(rawInput VARCHAR) RETURNS VARCHAR
+AS $_$
+DECLARE
+  tempInt INTEGER;
+  tempString VARCHAR;
+  state VARCHAR;
+  stateAbbrev VARCHAR;
+  result VARCHAR;
+  rec RECORD;
+  test BOOLEAN;
+  ws VARCHAR;
+BEGIN
+  ws := E'[ ,.\t\n\f\r]';
+
+  -- Separate out the last word of the state, and use it to compare to
+  -- the state lookup table to determine the entire name, as well as the
+  -- abbreviation associated with it.  The zip code may or may not have
+  -- been found.
+  tempString := substring(rawInput from ws || E'+([^ ,.\t\n\f\r0-9]*?)$');
+  SELECT INTO tempInt count(*) FROM (select distinct abbrev from state_lookup
+      WHERE upper(abbrev) = upper(tempString)) as blah;
+  IF tempInt = 1 THEN
+    state := tempString;
+    SELECT INTO stateAbbrev abbrev FROM (select distinct abbrev from
+        state_lookup WHERE upper(abbrev) = upper(tempString)) as blah;
+  ELSE
+    SELECT INTO tempInt count(*) FROM state_lookup WHERE upper(name)
+        like upper('%' || tempString);
+    IF tempInt >= 1 THEN
+      FOR rec IN SELECT name from state_lookup WHERE upper(name)
+          like upper('%' || tempString) LOOP
+        SELECT INTO test texticregexeq(rawInput, name) FROM state_lookup
+            WHERE rec.name = name;
+        IF test THEN
+          SELECT INTO stateAbbrev abbrev FROM state_lookup
+              WHERE rec.name = name;
+          state := substring(rawInput, '(?i)' || rec.name);
+          EXIT;
+        END IF;
+      END LOOP;
+    ELSE
+      -- No direct match for state, so perform fuzzy match.
+      SELECT INTO tempInt count(*) FROM state_lookup
+          WHERE soundex(tempString) = end_soundex(name);
+      IF tempInt >= 1 THEN
+        FOR rec IN SELECT name, abbrev FROM state_lookup
+            WHERE soundex(tempString) = end_soundex(name) LOOP
+          tempInt := count_words(rec.name);
+          tempString := get_last_words(rawInput, tempInt);
+          test := TRUE;
+          FOR i IN 1..tempInt LOOP
+            IF soundex(split_part(tempString, ' ', i)) !=
+               soundex(split_part(rec.name, ' ', i)) THEN
+              test := FALSE;
+            END IF;
+          END LOOP;
+          IF test THEN
+            state := tempString;
+            stateAbbrev := rec.abbrev;
+            EXIT;
+          END IF;
+        END LOOP;
+      END IF;
+    END IF;
+  END IF;
+
+  IF state IS NOT NULL AND stateAbbrev IS NOT NULL THEN
+    result := state || ':' || stateAbbrev;
+  END IF;
+
+  RETURN result;
+END;
+$_$ LANGUAGE plpgsql;



More information about the postgis-commits mailing list