mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 14:10:55 +02:00
Add NAR 2025-07 and update download links
This commit is contained in:
@@ -0,0 +1,568 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "11c50f98-ddeb-4af0-b69a-743f915fa904",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Experimenting with processing this file. Still need to figure out how to structure this file\n",
|
||||
"In summary:\n",
|
||||
"- there are some types that should be fixed. For example: `sac_type` should not be `Integer64`, `bu_use` should be `Int8`, `civic_no` should be `Int32`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "95feb0e4-b3b6-4235-8c8d-bc3652e82b3a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Buckaroo has been enabled as the default DataFrame viewer. To return to default dataframe visualization use `from buckaroo import disable; disable()`\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#!/usr/bin/env python\n",
|
||||
"# coding: utf-8\n",
|
||||
"import gc\n",
|
||||
"import glob\n",
|
||||
"import os\n",
|
||||
"import sys \n",
|
||||
"\n",
|
||||
"import buckaroo\n",
|
||||
"import duckdb\n",
|
||||
"from IPython.core.interactiveshell import InteractiveShell \n",
|
||||
"import geopandas as gpd\n",
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"from sqlalchemy import text\n",
|
||||
"\n",
|
||||
"# Enable multiple outputs per cell\n",
|
||||
"InteractiveShell.ast_node_interactivity = \"all\"\n",
|
||||
"# Show all columns\n",
|
||||
"pd.set_option('display.max_columns', None)\n",
|
||||
"\n",
|
||||
"DATABASE = os.environ.get(\"POSTGRES_DB\")\n",
|
||||
"USER = os.environ.get(\"POSTGRES_USER\")\n",
|
||||
"PASSWORD = os.environ.get(\"POSTGRES_PASSWORD\")\n",
|
||||
"\n",
|
||||
"engine = create_engine(f\"postgresql://{USER}:{PASSWORD}@db:5432/{DATABASE}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e10bc182-d0d6-4aa3-99b3-2fc396f9ce34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_folder = '/data/national_address_register/extracted'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7f170d27-14ca-488a-811d-1c9836264bb6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 1. Process 2025-07 vintage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b27e0edc-59ca-4a63-8bff-1d85909aa174",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"nar_addresses_csvs = glob.glob(f'{input_folder}/2025-07/Addresses/*.csv')\n",
|
||||
"nar_locations_csvs = glob.glob(f'{input_folder}/2025-07/Locations/*.csv')\n",
|
||||
"encoding = 'utf-8'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "3fcca833-8547-43b1-b5c7-e5f463ef0bbc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def process_nar_locations_csvs(csvs_to_process, encoding):\n",
|
||||
" \"\"\"\n",
|
||||
" 1. Reads subset of fields for National Address Register locations\n",
|
||||
" 2. Appends all of the processed CSVs as one dataframe\n",
|
||||
" \"\"\"\n",
|
||||
" dataframes_to_concatenate = []\n",
|
||||
" for filename in csvs_to_process:\n",
|
||||
" print(f\"Processing {filename}\")\n",
|
||||
" params = {\n",
|
||||
" 'filepath_or_buffer': filename,\n",
|
||||
" 'encoding': encoding,\n",
|
||||
" 'usecols': ['LOC_GUID', \n",
|
||||
" 'BG_LATITUDE', \n",
|
||||
" 'BG_LONGITUDE'\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" nar_location_df = pd.read_csv(**params)\n",
|
||||
" # Lowercase columns\n",
|
||||
" nar_location_df.columns = [x.lower() for x in nar_location_df.columns]\n",
|
||||
" dataframes_to_concatenate.append(nar_location_df)\n",
|
||||
" \n",
|
||||
" print(\"Concatenating all dataframes into one\")\n",
|
||||
" nar_locations_df = pd.concat(dataframes_to_concatenate)\n",
|
||||
" \n",
|
||||
" return nar_locations_df\n",
|
||||
"\n",
|
||||
"def process_nar_addresses_csvs(csvs_to_process, encoding):\n",
|
||||
" \"\"\"\n",
|
||||
" 1. Reads subset of fields for National Address Register addresses\n",
|
||||
" 2. Appends all of the processed CSVs as one dataframe\n",
|
||||
" \"\"\"\n",
|
||||
" dataframes_to_concatenate = []\n",
|
||||
" for filename in csvs_to_process:\n",
|
||||
" print(f\"Processing {filename}\")\n",
|
||||
" params = {\n",
|
||||
" 'filepath_or_buffer': filename,\n",
|
||||
" 'encoding': encoding,\n",
|
||||
" 'usecols': ['LOC_GUID', \n",
|
||||
" 'ADDR_GUID', \n",
|
||||
" 'APT_NO_LABEL',\n",
|
||||
" 'CIVIC_NO',\n",
|
||||
" 'CIVIC_NO_SUFFIX',\n",
|
||||
" 'OFFICIAL_STREET_NAME',\n",
|
||||
" 'OFFICIAL_STREET_TYPE',\n",
|
||||
" 'OFFICIAL_STREET_DIR',\n",
|
||||
" 'MAIL_STREET_NAME',\n",
|
||||
" 'MAIL_STREET_TYPE',\n",
|
||||
" 'MAIL_STREET_DIR',\n",
|
||||
" 'MAIL_MUN_NAME',\n",
|
||||
" 'MAIL_POSTAL_CODE',\n",
|
||||
" 'BG_DLS_LSD',\n",
|
||||
" 'BG_DLS_QTR',\n",
|
||||
" 'BG_DLS_SCTN',\n",
|
||||
" 'BG_DLS_RNG',\n",
|
||||
" 'BG_DLS_MRD',\n",
|
||||
" # Removing since REPPOINT_LATITUDE and REPPOINT_LONGITUDE seem to have same purpose\n",
|
||||
" #'BG_X',\n",
|
||||
" #'BG_Y',\n",
|
||||
" 'BU_USE',\n",
|
||||
" 'BU_N_CIVIC_ADD'\n",
|
||||
" ],\n",
|
||||
" 'dtype': {\n",
|
||||
" \"CIVIC_NO\": \"Int32\", \n",
|
||||
" \"PROV_CODE\": object,\n",
|
||||
" \"BU_USE\": \"Int8\",\n",
|
||||
" \"BG_DLS_LSD\": object,\n",
|
||||
" \"BG_DLS_QTR\": object,\n",
|
||||
" \"BG_DLS_SCTN\": object,\n",
|
||||
" \"BG_DLS_TWNSHP\": object,\n",
|
||||
" \"BG_DLS_RNG\": object,\n",
|
||||
" \"BG_DLS_MRD\": object\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" nar_address_df = pd.read_csv(**params)\n",
|
||||
" # Lowercase columns\n",
|
||||
" nar_address_df.columns = [x.lower() for x in nar_address_df.columns]\n",
|
||||
" dataframes_to_concatenate.append(nar_address_df)\n",
|
||||
" \n",
|
||||
" print(\"Concatenating all dataframes into one\")\n",
|
||||
" nar_addresses_df = pd.concat(dataframes_to_concatenate, ignore_index=True)\n",
|
||||
" \n",
|
||||
" return nar_addresses_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "f9bb3a4d-d913-4b9d-aa49-7ec2c4dfef60",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_48_part_2.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_59_part_1.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_59_part_2.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_60.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_61.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_62.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_10.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_11.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_12.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_13.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_24_part_1.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_24_part_2.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_24_part_3.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_24_part_4.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_1.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_2.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_3.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_4.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_5.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_46.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_47.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_48_part_1.csv\n",
|
||||
"Concatenating all dataframes into one\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_48_part_2.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_48_part_3.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_59_part_1.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_59_part_2.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_59_part_3.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_60.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_61.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_62.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_10.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_11.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_12.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_13.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_1.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_2.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_3.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_4.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_5.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_1.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_2.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_3.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_4.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_5.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_6.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_7.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_46.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_47.csv\n",
|
||||
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_48_part_1.csv\n",
|
||||
"Concatenating all dataframes into one\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"nar_locations = process_nar_locations_csvs(nar_locations_csvs, encoding)\n",
|
||||
"nar_addresses = process_nar_addresses_csvs(nar_addresses_csvs, encoding)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "a0cff7f9-7dcb-4e05-a8c9-ffdfc18613e1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b96d301821b74689bfd7837d87ae9305",
|
||||
"version_major": 2,
|
||||
"version_minor": 1
|
||||
},
|
||||
"text/plain": [
|
||||
"BuckarooInfiniteWidget(buckaroo_options={'sampled': ['random'], 'auto_clean': ['aggressive', 'conservative'], …"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"nar_locations.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b49f6602-9bda-410f-82a2-54a5711311b0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TODO\n",
|
||||
"- look into why there are locations with empty reppoint_latitude and reppoint_longitude\n",
|
||||
" - There are 84,285 records that have an empty reppoint_latitude and reppoint_longitude"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "f3600645-c350-473f-81ea-0bd9ebe70e37",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Combining nar_addresses and nar_locations\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2762"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Combining nar_addresses and nar_locations\")\n",
|
||||
"nar_addresses_combined = duckdb.sql(\"\"\"\n",
|
||||
"SELECT a.addr_guid, a.apt_no_label, a.civic_no, a.civic_no_suffix, a.official_street_name, a.mail_street_name, a.official_street_type, a.mail_street_type,\n",
|
||||
" a.official_street_dir AS official_street_direction, a.mail_street_dir AS mail_street_direction, a.mail_postal_code, a.mail_mun_name AS mail_municipality_name, \n",
|
||||
" a.bu_n_civic_add, a.bu_use,\n",
|
||||
" a.bg_dls_lsd, a.bg_dls_qtr, a.bg_dls_sctn, a.bg_dls_rng, a.bg_dls_mrd,\n",
|
||||
" b.bg_latitude AS reppoint_latitude, b.bg_longitude AS reppoint_longitude\n",
|
||||
"FROM nar_addresses AS a,\n",
|
||||
" nar_locations AS b\n",
|
||||
"WHERE a.loc_guid = b.loc_guid AND b.bg_latitude IS NOT NULL\n",
|
||||
"\"\"\").df()\n",
|
||||
"\n",
|
||||
"del nar_addresses\n",
|
||||
"del nar_locations\n",
|
||||
"gc.collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "0d35eec6-8ab3-4e7a-a7fd-2915333e27f9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gdf = gpd.GeoDataFrame(\n",
|
||||
" nar_addresses_combined, \n",
|
||||
" geometry=gpd.points_from_xy(nar_addresses_combined.reppoint_longitude,\n",
|
||||
" nar_addresses_combined.reppoint_latitude),\n",
|
||||
" crs=\"EPSG:4326\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "614ad773-adb3-413c-8b8f-da721afc85cd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dropping 'reppoint_latitude', 'reppoint_longitude' from geodataframe\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Dropping 'reppoint_latitude', 'reppoint_longitude' from geodataframe\")\n",
|
||||
"gdf.drop(columns=[\"reppoint_latitude\", \"reppoint_longitude\"], \n",
|
||||
" inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "cac024f8-6eb5-48ea-88ad-289edf505ba3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"del nar_addresses_combined\n",
|
||||
"gc.collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "d2f81e61-1fc7-4518-ad1f-d515e16ce22c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading geodataframe to PostgreSQL as bronze.nar_2025_07\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Loading geodataframe to PostgreSQL as bronze.nar_2025_07\")\n",
|
||||
"gdf.to_postgis(name=\"nar_2025_07\", \n",
|
||||
" schema='bronze',\n",
|
||||
" con=engine,\n",
|
||||
" chunksize=150000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "b7fb6976-0b95-445e-b1e8-022c39bce25b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"584"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"del(gdf)\n",
|
||||
"gc.collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "384cd07b-c79f-42e8-81d6-ecbe78f167b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Link to 2021 geographies\n",
|
||||
"There are 10 records that were not linked to 2021 geographies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "f7f56297-150b-4ad2-a5c2-2ee5e477f978",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sql = \"\"\"\n",
|
||||
"DROP TABLE IF EXISTS silver.nar_2025_07;\n",
|
||||
"CREATE TABLE silver.nar_2025_07 AS\n",
|
||||
"SELECT DISTINCT\n",
|
||||
" b.country_dguid,\n",
|
||||
" b.country_en_name,\n",
|
||||
" b.country_fr_name,\n",
|
||||
" b.country_en_abbreviation,\n",
|
||||
" b.country_fr_abbreviation,\n",
|
||||
" b.grc_dguid,\n",
|
||||
" b.grc_en_name,\n",
|
||||
" b.grc_fr_name,\n",
|
||||
" b.pr_dguid,\n",
|
||||
" b.pr_en_name,\n",
|
||||
" b.pr_fr_name,\n",
|
||||
" b.pr_en_abbreviation,\n",
|
||||
" b.pr_fr_abbreviation,\n",
|
||||
" b.pr_iso_code,\n",
|
||||
" b.car_dguid,\n",
|
||||
" b.car_en_name,\n",
|
||||
" b.car_fr_name,\n",
|
||||
" b.er_dguid,\n",
|
||||
" b.er_name,\n",
|
||||
" b.cd_dguid,\n",
|
||||
" b.cd_name,\n",
|
||||
" b.cd_type,\n",
|
||||
" b.ccs_dguid,\n",
|
||||
" b.ccs_name,\n",
|
||||
" b.cma_dguid,\n",
|
||||
" b.cma_p_dguid,\n",
|
||||
" b.cma_name,\n",
|
||||
" b.cma_type,\n",
|
||||
" b.csd_dguid,\n",
|
||||
" b.csd_name,\n",
|
||||
" b.csd_type,\n",
|
||||
" b.sac_type,\n",
|
||||
" b.sac_code,\n",
|
||||
" b.fed_dguid,\n",
|
||||
" b.fed_name,\n",
|
||||
" b.fed_en_name,\n",
|
||||
" b.fed_fr_name,\n",
|
||||
" b.ct_dguid,\n",
|
||||
" b.ada_dguid,\n",
|
||||
" b.da_dguid,\n",
|
||||
" b.db_dguid,\n",
|
||||
" a.addr_guid,\n",
|
||||
" a.apt_no_label,\n",
|
||||
" a.civic_no,\n",
|
||||
" a.civic_no_suffix,\n",
|
||||
" a.official_street_name, \n",
|
||||
" a.mail_street_name, \n",
|
||||
" a.official_street_type,\n",
|
||||
" a.mail_street_type,\n",
|
||||
" a.official_street_direction,\n",
|
||||
" a.mail_street_direction,\n",
|
||||
" a.mail_postal_code,\n",
|
||||
" a.mail_municipality_name,\n",
|
||||
" a.bu_n_civic_add,\n",
|
||||
" a.bu_use,\n",
|
||||
" a.bg_dls_lsd,\n",
|
||||
" a.bg_dls_qtr,\n",
|
||||
" a.bg_dls_sctn,\n",
|
||||
" a.bg_dls_rng,\n",
|
||||
" a.bg_dls_mrd,\n",
|
||||
" a.geometry AS geom\n",
|
||||
"FROM bronze.nar_2025_07 AS a,\n",
|
||||
" silver.db_2021_digital AS b\n",
|
||||
"WHERE ST_Intersects(a.geometry, b.geom);\n",
|
||||
"\n",
|
||||
"-- Create spatial index\n",
|
||||
"CREATE INDEX nar_2025_07_geom_idx ON silver.nar_2025_07 USING gist (geom) WITH (\n",
|
||||
" fillfactor = 100\n",
|
||||
");\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "ae7babb4-a3e6-4c0b-93a6-3b52a4f89a1b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<sqlalchemy.engine.cursor.CursorResult at 0x708e93a05860>"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with engine.connect() as conn:\n",
|
||||
" conn.execute(text(sql))\n",
|
||||
" conn.commit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2fd00615-f6e4-4aeb-a6a4-78d6a7ee5378",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user