Add NAR 2025-07 and update download links

This commit is contained in:
Diego Ripley
2025-09-22 17:06:19 +00:00
parent 37c3a286d8
commit d23018d8a6
9 changed files with 624 additions and 13 deletions
+4 -2
View File
@@ -2,12 +2,14 @@
if [ ! -d "${DATA_FOLDER}/national_address_register" ]
then
echo "Making directory ${DATA_FOLDER}/national_address_register/"
mkdir -p ${DATA_FOLDER}/national_address_register/{input,extracted,output}/{2024-12,2024-06,2023,2022}
mkdir -p ${DATA_FOLDER}/national_address_register/{input,extracted,output}/{2025-07,2024-12,2024-06,2023,2022}
fi
INPUT_FOLDER="${DATA_FOLDER}/national_address_register/input"
echo "Downloading 2024-12 vintage of national address register files"
echo "Downloading 2025-07 vintage of national address register files"
aria2c -x16 -i "${SCRIPT_DIR}/national_address_register/national_address_register_files_2025_07.txt" --dir=${INPUT_FOLDER}/2025-07 --auto-file-renaming=false
echo "Downloading 2024-12 vintage of national address register files"0
aria2c -x16 -i "${SCRIPT_DIR}/national_address_register/national_address_register_files_2024_12.txt" --dir=${INPUT_FOLDER}/2024-12 --auto-file-renaming=false
echo "Downloading 2024-06 vintage of national address register files"
aria2c -x16 -i "${SCRIPT_DIR}/national_address_register/national_address_register_files_2024_06.txt" --dir=${INPUT_FOLDER}/2024-06 --auto-file-renaming=false
+34 -5
View File
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "b6e053ec",
"metadata": {},
"outputs": [],
@@ -12,12 +12,41 @@
"source ../.env"
]
},
{
"cell_type": "markdown",
"id": "7535120d-7e2d-4bc1-8bfc-f8240a8b0902",
"metadata": {},
"source": [
"# Export National Address Register File (2025-07 vintage)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad20ec9f-53d2-4616-84cf-ea98e7f3e39f",
"metadata": {},
"outputs": [],
"source": [
"output_folder=\"${DATA_FOLDER}/national_address_register/output/2025-07\"\n",
"output_file=\"${output_folder}/nar_2025_07.parquet\"\n",
"echo \"Exporting silver.nar_2025_07 table to ${output_file}\" \n",
"ogr2ogr \\\n",
" -lco COMPRESSION=\"ZSTD\" \\\n",
" -lco CREATOR=\"www.dataforcanada.org\" \\\n",
" -lco WRITE_COVERING_BBOX=\"YES\" \\\n",
" -lco SORT_BY_BBOX=\"YES\" \\\n",
" -f Parquet \\\n",
" ${output_file} \\\n",
" \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n",
" \"silver.nar_2025_07\""
]
},
{
"cell_type": "markdown",
"id": "12eca225-3d05-4bb7-95fa-7b9df694f53d",
"metadata": {},
"source": [
"# 1. Export National Address Register File (2024-12 vintage)"
"# Export National Address Register File (2024-12 vintage)"
]
},
{
@@ -54,7 +83,7 @@
"id": "245cd8fb-da36-4476-8e5c-4d3665e901d7",
"metadata": {},
"source": [
"# 2. Export National Address Register File (2024-06 vintage)"
"# Export National Address Register File (2024-06 vintage)"
]
},
{
@@ -83,7 +112,7 @@
"id": "56ef7755-06ef-473c-8a1f-0129f6b1dc28",
"metadata": {},
"source": [
"# 3. Export National Address Register File (2023 vintage)"
"# Export National Address Register File (2023 vintage)"
]
},
{
@@ -112,7 +141,7 @@
"id": "f1bfd838-19aa-4de7-9ca1-61b1215d7865",
"metadata": {},
"source": [
"# 4. Export National Address Register File (2022 vintage)"
"# Export National Address Register File (2022 vintage)"
]
},
{
@@ -1,2 +1,2 @@
# 2022
https://data.dataforcanada.org/archive/statistics_canada/national_address_register/2022/2022.zip
https://data-01.dataforcanada.org/archive/statistics_canada/national_address_register/2022/2022.zip
@@ -1,2 +1,2 @@
# 2023
https://data.dataforcanada.org/archive/statistics_canada/national_address_register/2023/2023.zip
https://data-01.dataforcanada.org/archive/statistics_canada/national_address_register/2023/2023.zip
@@ -1,2 +1,2 @@
# June 2024
https://data.dataforcanada.org/archive/statistics_canada/national_address_register/2024-06/2024.zip
https://data-01.dataforcanada.org/archive/statistics_canada/national_address_register/2024-06/2024.zip
@@ -1,2 +1,2 @@
# December 2024
https://data.dataforcanada.org/archive/statistics_canada/national_address_register/2024-12/202412.zip
https://data-01.dataforcanada.org/archive/statistics_canada/national_address_register/2024-12/202412.zip
@@ -0,0 +1,2 @@
# July 2025
https://data-01.dataforcanada.org/archive/statistics_canada/national_address_register/2025-07/202507.zip
+12 -2
View File
@@ -3,6 +3,15 @@
INPUT_FOLDER="${DATA_FOLDER}/national_address_register/input"
EXTRACTED_FOLDER="${DATA_FOLDER}/national_address_register/extracted"
process_202507() {
# Process 2025-07 vintage
echo "Extracting ${INPUT_FOLDER}/2025-07/202507.zip to ${EXTRACTED_FOLDER}/2025-07"
unzip -q -n ${INPUT_FOLDER}/2025-07/202507.zip -d ${EXTRACTED_FOLDER}/2025-07
jupyter execute process_2025_07.ipynb
rclone --progress copy /data/national_address_register/output/2025-07/nar_2025_07.parquet cloudflare:/dataforcanada-data/processed/statistics_canada/national_address_register/2025-07/
}
process_202412() {
# Process 2024-12 vintage
echo "Extracting ${INPUT_FOLDER}/2024-12/202412.zip to ${EXTRACTED_FOLDER}/2024-12"
@@ -31,7 +40,8 @@ process_2022() {
# Encoding is latin-1
}
process_202412
process_202406
process_202507
#process_202412
#process_202406
#process_2023
#process_2022
@@ -0,0 +1,568 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "11c50f98-ddeb-4af0-b69a-743f915fa904",
"metadata": {},
"source": [
"# Experimenting with processing this file. Still need to figure out how to structure this file\n",
"In summary:\n",
"- there are some types that should be fixed. For example: `sac_type` should not be `Integer64`, `bu_use` should be `Int8`, `civic_no` should be `Int32`"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "95feb0e4-b3b6-4235-8c8d-bc3652e82b3a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Buckaroo has been enabled as the default DataFrame viewer. To return to default dataframe visualization use `from buckaroo import disable; disable()`\n"
]
}
],
"source": [
"#!/usr/bin/env python\n",
"# coding: utf-8\n",
"import gc\n",
"import glob\n",
"import os\n",
"import sys \n",
"\n",
"import buckaroo\n",
"import duckdb\n",
"from IPython.core.interactiveshell import InteractiveShell \n",
"import geopandas as gpd\n",
"import pandas as pd\n",
"from sqlalchemy import create_engine\n",
"from sqlalchemy import text\n",
"\n",
"# Enable multiple outputs per cell\n",
"InteractiveShell.ast_node_interactivity = \"all\"\n",
"# Show all columns\n",
"pd.set_option('display.max_columns', None)\n",
"\n",
"DATABASE = os.environ.get(\"POSTGRES_DB\")\n",
"USER = os.environ.get(\"POSTGRES_USER\")\n",
"PASSWORD = os.environ.get(\"POSTGRES_PASSWORD\")\n",
"\n",
"engine = create_engine(f\"postgresql://{USER}:{PASSWORD}@db:5432/{DATABASE}\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e10bc182-d0d6-4aa3-99b3-2fc396f9ce34",
"metadata": {},
"outputs": [],
"source": [
"input_folder = '/data/national_address_register/extracted'"
]
},
{
"cell_type": "markdown",
"id": "7f170d27-14ca-488a-811d-1c9836264bb6",
"metadata": {},
"source": [
"# 1. Process 2025-07 vintage"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b27e0edc-59ca-4a63-8bff-1d85909aa174",
"metadata": {},
"outputs": [],
"source": [
"nar_addresses_csvs = glob.glob(f'{input_folder}/2025-07/Addresses/*.csv')\n",
"nar_locations_csvs = glob.glob(f'{input_folder}/2025-07/Locations/*.csv')\n",
"encoding = 'utf-8'"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "3fcca833-8547-43b1-b5c7-e5f463ef0bbc",
"metadata": {},
"outputs": [],
"source": [
"def process_nar_locations_csvs(csvs_to_process, encoding):\n",
" \"\"\"\n",
" 1. Reads subset of fields for National Address Register locations\n",
" 2. Appends all of the processed CSVs as one dataframe\n",
" \"\"\"\n",
" dataframes_to_concatenate = []\n",
" for filename in csvs_to_process:\n",
" print(f\"Processing {filename}\")\n",
" params = {\n",
" 'filepath_or_buffer': filename,\n",
" 'encoding': encoding,\n",
" 'usecols': ['LOC_GUID', \n",
" 'BG_LATITUDE', \n",
" 'BG_LONGITUDE'\n",
" ]\n",
" }\n",
" nar_location_df = pd.read_csv(**params)\n",
" # Lowercase columns\n",
" nar_location_df.columns = [x.lower() for x in nar_location_df.columns]\n",
" dataframes_to_concatenate.append(nar_location_df)\n",
" \n",
" print(\"Concatenating all dataframes into one\")\n",
" nar_locations_df = pd.concat(dataframes_to_concatenate)\n",
" \n",
" return nar_locations_df\n",
"\n",
"def process_nar_addresses_csvs(csvs_to_process, encoding):\n",
" \"\"\"\n",
" 1. Reads subset of fields for National Address Register addresses\n",
" 2. Appends all of the processed CSVs as one dataframe\n",
" \"\"\"\n",
" dataframes_to_concatenate = []\n",
" for filename in csvs_to_process:\n",
" print(f\"Processing {filename}\")\n",
" params = {\n",
" 'filepath_or_buffer': filename,\n",
" 'encoding': encoding,\n",
" 'usecols': ['LOC_GUID', \n",
" 'ADDR_GUID', \n",
" 'APT_NO_LABEL',\n",
" 'CIVIC_NO',\n",
" 'CIVIC_NO_SUFFIX',\n",
" 'OFFICIAL_STREET_NAME',\n",
" 'OFFICIAL_STREET_TYPE',\n",
" 'OFFICIAL_STREET_DIR',\n",
" 'MAIL_STREET_NAME',\n",
" 'MAIL_STREET_TYPE',\n",
" 'MAIL_STREET_DIR',\n",
" 'MAIL_MUN_NAME',\n",
" 'MAIL_POSTAL_CODE',\n",
" 'BG_DLS_LSD',\n",
" 'BG_DLS_QTR',\n",
" 'BG_DLS_SCTN',\n",
" 'BG_DLS_RNG',\n",
" 'BG_DLS_MRD',\n",
" # Removing since REPPOINT_LATITUDE and REPPOINT_LONGITUDE seem to have same purpose\n",
" #'BG_X',\n",
" #'BG_Y',\n",
" 'BU_USE',\n",
" 'BU_N_CIVIC_ADD'\n",
" ],\n",
" 'dtype': {\n",
" \"CIVIC_NO\": \"Int32\", \n",
" \"PROV_CODE\": object,\n",
" \"BU_USE\": \"Int8\",\n",
" \"BG_DLS_LSD\": object,\n",
" \"BG_DLS_QTR\": object,\n",
" \"BG_DLS_SCTN\": object,\n",
" \"BG_DLS_TWNSHP\": object,\n",
" \"BG_DLS_RNG\": object,\n",
" \"BG_DLS_MRD\": object\n",
" }\n",
" }\n",
" nar_address_df = pd.read_csv(**params)\n",
" # Lowercase columns\n",
" nar_address_df.columns = [x.lower() for x in nar_address_df.columns]\n",
" dataframes_to_concatenate.append(nar_address_df)\n",
" \n",
" print(\"Concatenating all dataframes into one\")\n",
" nar_addresses_df = pd.concat(dataframes_to_concatenate, ignore_index=True)\n",
" \n",
" return nar_addresses_df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f9bb3a4d-d913-4b9d-aa49-7ec2c4dfef60",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_48_part_2.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_59_part_1.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_59_part_2.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_60.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_61.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_62.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_10.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_11.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_12.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_13.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_24_part_1.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_24_part_2.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_24_part_3.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_24_part_4.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_1.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_2.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_3.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_4.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_35_part_5.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_46.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_47.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Locations/Location_48_part_1.csv\n",
"Concatenating all dataframes into one\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_48_part_2.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_48_part_3.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_59_part_1.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_59_part_2.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_59_part_3.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_60.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_61.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_62.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_10.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_11.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_12.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_13.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_1.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_2.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_3.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_4.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_24_part_5.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_1.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_2.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_3.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_4.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_5.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_6.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_35_part_7.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_46.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_47.csv\n",
"Processing /data/national_address_register/extracted/2025-07/Addresses/Address_48_part_1.csv\n",
"Concatenating all dataframes into one\n"
]
}
],
"source": [
"nar_locations = process_nar_locations_csvs(nar_locations_csvs, encoding)\n",
"nar_addresses = process_nar_addresses_csvs(nar_addresses_csvs, encoding)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "a0cff7f9-7dcb-4e05-a8c9-ffdfc18613e1",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b96d301821b74689bfd7837d87ae9305",
"version_major": 2,
"version_minor": 1
},
"text/plain": [
"BuckarooInfiniteWidget(buckaroo_options={'sampled': ['random'], 'auto_clean': ['aggressive', 'conservative'], …"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"nar_locations.head()"
]
},
{
"cell_type": "markdown",
"id": "b49f6602-9bda-410f-82a2-54a5711311b0",
"metadata": {},
"source": [
"# TODO\n",
"- look into why there are locations with empty reppoint_latitude and reppoint_longitude\n",
" - There are 84,285 records that have an empty reppoint_latitude and reppoint_longitude"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f3600645-c350-473f-81ea-0bd9ebe70e37",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Combining nar_addresses and nar_locations\n"
]
},
{
"data": {
"text/plain": [
"2762"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"Combining nar_addresses and nar_locations\")\n",
"nar_addresses_combined = duckdb.sql(\"\"\"\n",
"SELECT a.addr_guid, a.apt_no_label, a.civic_no, a.civic_no_suffix, a.official_street_name, a.mail_street_name, a.official_street_type, a.mail_street_type,\n",
" a.official_street_dir AS official_street_direction, a.mail_street_dir AS mail_street_direction, a.mail_postal_code, a.mail_mun_name AS mail_municipality_name, \n",
" a.bu_n_civic_add, a.bu_use,\n",
" a.bg_dls_lsd, a.bg_dls_qtr, a.bg_dls_sctn, a.bg_dls_rng, a.bg_dls_mrd,\n",
" b.bg_latitude AS reppoint_latitude, b.bg_longitude AS reppoint_longitude\n",
"FROM nar_addresses AS a,\n",
" nar_locations AS b\n",
"WHERE a.loc_guid = b.loc_guid AND b.bg_latitude IS NOT NULL\n",
"\"\"\").df()\n",
"\n",
"del nar_addresses\n",
"del nar_locations\n",
"gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "0d35eec6-8ab3-4e7a-a7fd-2915333e27f9",
"metadata": {},
"outputs": [],
"source": [
"gdf = gpd.GeoDataFrame(\n",
" nar_addresses_combined, \n",
" geometry=gpd.points_from_xy(nar_addresses_combined.reppoint_longitude,\n",
" nar_addresses_combined.reppoint_latitude),\n",
" crs=\"EPSG:4326\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "614ad773-adb3-413c-8b8f-da721afc85cd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dropping 'reppoint_latitude', 'reppoint_longitude' from geodataframe\n"
]
}
],
"source": [
"print(\"Dropping 'reppoint_latitude', 'reppoint_longitude' from geodataframe\")\n",
"gdf.drop(columns=[\"reppoint_latitude\", \"reppoint_longitude\"], \n",
" inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "cac024f8-6eb5-48ea-88ad-289edf505ba3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"del nar_addresses_combined\n",
"gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d2f81e61-1fc7-4518-ad1f-d515e16ce22c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading geodataframe to PostgreSQL as bronze.nar_2025_07\n"
]
}
],
"source": [
"print(\"Loading geodataframe to PostgreSQL as bronze.nar_2025_07\")\n",
"gdf.to_postgis(name=\"nar_2025_07\", \n",
" schema='bronze',\n",
" con=engine,\n",
" chunksize=150000)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "b7fb6976-0b95-445e-b1e8-022c39bce25b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"584"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"del(gdf)\n",
"gc.collect()"
]
},
{
"cell_type": "markdown",
"id": "384cd07b-c79f-42e8-81d6-ecbe78f167b1",
"metadata": {},
"source": [
"## Link to 2021 geographies\n",
"There are 10 records that were not linked to 2021 geographies"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f7f56297-150b-4ad2-a5c2-2ee5e477f978",
"metadata": {},
"outputs": [],
"source": [
"sql = \"\"\"\n",
"DROP TABLE IF EXISTS silver.nar_2025_07;\n",
"CREATE TABLE silver.nar_2025_07 AS\n",
"SELECT DISTINCT\n",
" b.country_dguid,\n",
" b.country_en_name,\n",
" b.country_fr_name,\n",
" b.country_en_abbreviation,\n",
" b.country_fr_abbreviation,\n",
" b.grc_dguid,\n",
" b.grc_en_name,\n",
" b.grc_fr_name,\n",
" b.pr_dguid,\n",
" b.pr_en_name,\n",
" b.pr_fr_name,\n",
" b.pr_en_abbreviation,\n",
" b.pr_fr_abbreviation,\n",
" b.pr_iso_code,\n",
" b.car_dguid,\n",
" b.car_en_name,\n",
" b.car_fr_name,\n",
" b.er_dguid,\n",
" b.er_name,\n",
" b.cd_dguid,\n",
" b.cd_name,\n",
" b.cd_type,\n",
" b.ccs_dguid,\n",
" b.ccs_name,\n",
" b.cma_dguid,\n",
" b.cma_p_dguid,\n",
" b.cma_name,\n",
" b.cma_type,\n",
" b.csd_dguid,\n",
" b.csd_name,\n",
" b.csd_type,\n",
" b.sac_type,\n",
" b.sac_code,\n",
" b.fed_dguid,\n",
" b.fed_name,\n",
" b.fed_en_name,\n",
" b.fed_fr_name,\n",
" b.ct_dguid,\n",
" b.ada_dguid,\n",
" b.da_dguid,\n",
" b.db_dguid,\n",
" a.addr_guid,\n",
" a.apt_no_label,\n",
" a.civic_no,\n",
" a.civic_no_suffix,\n",
" a.official_street_name, \n",
" a.mail_street_name, \n",
" a.official_street_type,\n",
" a.mail_street_type,\n",
" a.official_street_direction,\n",
" a.mail_street_direction,\n",
" a.mail_postal_code,\n",
" a.mail_municipality_name,\n",
" a.bu_n_civic_add,\n",
" a.bu_use,\n",
" a.bg_dls_lsd,\n",
" a.bg_dls_qtr,\n",
" a.bg_dls_sctn,\n",
" a.bg_dls_rng,\n",
" a.bg_dls_mrd,\n",
" a.geometry AS geom\n",
"FROM bronze.nar_2025_07 AS a,\n",
" silver.db_2021_digital AS b\n",
"WHERE ST_Intersects(a.geometry, b.geom);\n",
"\n",
"-- Create spatial index\n",
"CREATE INDEX nar_2025_07_geom_idx ON silver.nar_2025_07 USING gist (geom) WITH (\n",
" fillfactor = 100\n",
");\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "ae7babb4-a3e6-4c0b-93a6-3b52a4f89a1b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<sqlalchemy.engine.cursor.CursorResult at 0x708e93a05860>"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with engine.connect() as conn:\n",
" conn.execute(text(sql))\n",
" conn.commit()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2fd00615-f6e4-4aeb-a6a4-78d6a7ee5378",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}