Saturday, March 12, 2011

Updating the Code-Point postcode datataset in MySql

Some time ago I imported the Ordnance Survey Code-Point postcode dataset into MySql. It looks like there’s a new version of that dataset available which includes new postcodes so I wanted to update my database. I guess I could just empty the table and re-import the data, but since it takes some time import and the data is live on the web, this wasn’t the ideal solution. Fortunately, MySql has a useful IGNORE keyword which will ignore failed inserts so any old postcodes will be ignored (since the postcode is used as the primary key on the table) whilst new ones are inserted. Of course, this assumes that the latitude and longitude of old postcodes doesn’t change, which I’m hoping is a reasonable assumption. So my new code looks like this.

using System;
using System.IO;
using DotNetCoords;
using LumenWorks.Framework.IO.Csv;
using MySql.Data.MySqlClient;

namespace ImportCodepoint
{
  class Program
  {
    static void Main(string[] args)
    {
      string[] files = Directory.GetFiles(@"C:\Users\Doogal\Downloads\codepo_gb\Code-Point Open\Data");
      foreach (string file in files)
      {
        ReadFile(file);
      }

    }

    private static void ReadFile(string file)
    {
      using (StreamReader reader = new StreamReader(file))
      {
        CsvReader csvReader = new CsvReader(reader, false);
        using (MySqlConnection conn = new MySqlConnection(
          "server=server;uid=username;pwd=password;database=database;"))
        {
          conn.Open();
          foreach (string[] data in csvReader)
          {
            string postcode = data[0];
            // some postcodes have spaces, some don't
            if (postcode.IndexOf(' ') < 0)
              postcode = data[0].Substring(0, data[0].Length - 3) + " " + data[0].Substring(data[0].Length - 3);
            // some have two spaces...
            postcode = postcode.Replace("  ", " ");
            
            double easting = double.Parse(data[10]);
            double northing = double.Parse(data[11]);

            // there are some postcodes with no location
            if ((easting != 0) && (northing != 0))
            {
              // convert easting/northing to lat/long
              OSRef osRef = new OSRef(easting, northing);
              LatLng latLng = osRef.ToLatLng();
              latLng.ToWGS84();

              using (MySqlCommand command = conn.CreateCommand())
              {
                Console.WriteLine(postcode);
                command.CommandTimeout = 60;
                command.CommandText = string.Format(
                  "INSERT IGNORE INTO Postcodes (Postcode, Latitude, Longitude) " +
                  "VALUES ('{0}', {1}, {2})",
                  postcode, latLng.Latitude, latLng.Longitude);
                int count = command.ExecuteNonQuery();
                if (count > 0)
                  Console.WriteLine("Added");
              }
            }
          }
        }
      }
    }
  }
}

No comments: