diff --git a/main.sh b/main.sh index a127a3d..269076e 100755 --- a/main.sh +++ b/main.sh @@ -47,4 +47,9 @@ census_of_population/process.sh ## 7.1 Download Census of Population ## census_of_agriculture/download.sh ## 7.2 Process Census of Agriculture ## -census_of_agriculture/process.sh \ No newline at end of file +census_of_agriculture/process.sh +#### 8.0 National Address Register #### +## 8.1 Download National Address Register ## +national_address_register/download.sh +## 8.2 Load National Address Register ## +national_address_register/process.sh diff --git a/national_address_register/download.sh b/national_address_register/download.sh index 9980c01..c98527e 100755 --- a/national_address_register/download.sh +++ b/national_address_register/download.sh @@ -2,8 +2,16 @@ if [ ! -d "${DATA_FOLDER}/national_address_register" ] then echo "Making directory ${DATA_FOLDER}/national_address_register/" - mkdir -p ${DATA_FOLDER}/national_address_register/{input,extracted,output,scratch} + mkdir -p ${DATA_FOLDER}/national_address_register/{input,extracted,output}/{2024-12,2024-06,2023,2022} fi -echo "Downloading national address register files" -aria2c -x16 -i "${SCRIPT_DIR}/national_address_register/national_address_register_files.txt" --dir=$DATA_FOLDER/national_address_register/input --auto-file-renaming=false +INPUT_FOLDER="${DATA_FOLDER}/national_address_register/input" + +echo "Downloading 2024-12 vintage of national address register files" +aria2c -x16 -i "${SCRIPT_DIR}/national_address_register/national_address_register_files_2024_12.txt" --dir=${INPUT_FOLDER}/2024-12 --auto-file-renaming=false +echo "Downloading 2024-06 vintage of national address register files" +aria2c -x16 -i "${SCRIPT_DIR}/national_address_register/national_address_register_files_2024_06.txt" --dir=${INPUT_FOLDER}/2024-06 --auto-file-renaming=false +echo "Downloading 2023 vintage of national address register files" +aria2c -x16 -i "${SCRIPT_DIR}/national_address_register/national_address_register_files_2023.txt" --dir=${INPUT_FOLDER}/2023 --auto-file-renaming=false +echo "Downloading 2022 vintage of national address register files" +aria2c -x16 -i "${SCRIPT_DIR}/national_address_register/national_address_register_files_2022.txt" --dir=${INPUT_FOLDER}/2022 --auto-file-renaming=false \ No newline at end of file diff --git a/national_address_register/export.ipynb b/national_address_register/export.ipynb new file mode 100644 index 0000000..f1b0300 --- /dev/null +++ b/national_address_register/export.ipynb @@ -0,0 +1,155 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "b6e053ec", + "metadata": {}, + "outputs": [], + "source": [ + "DATA_FOLDER=/data\n", + "\n", + "source ../.env" + ] + }, + { + "cell_type": "markdown", + "id": "12eca225-3d05-4bb7-95fa-7b9df694f53d", + "metadata": {}, + "source": [ + "# 1. Export National Address Register File (2024-12 vintage)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4c5bb532", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exporting silver.nar_2024_12 table to /data/national_address_register/output/2024-12/nar_2024_12.parquet\n" + ] + } + ], + "source": [ + "output_folder=\"${DATA_FOLDER}/national_address_register/output/2024-12\"\n", + "output_file=\"${output_folder}/nar_2024_12.parquet\"\n", + "echo \"Exporting silver.nar_2024_12 table to ${output_file}\" \n", + "ogr2ogr \\\n", + " -lco COMPRESSION=\"ZSTD\" \\\n", + " -lco CREATOR=\"www.dataforcanada.org\" \\\n", + " -lco WRITE_COVERING_BBOX=\"YES\" \\\n", + " -lco SORT_BY_BBOX=\"YES\" \\\n", + " -f Parquet \\\n", + " ${output_file} \\\n", + " \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n", + " \"silver.nar_2024_12\"" + ] + }, + { + "cell_type": "markdown", + "id": "245cd8fb-da36-4476-8e5c-4d3665e901d7", + "metadata": {}, + "source": [ + "# 2. Export National Address Register File (2024-06 vintage)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3e2a5b5-9b31-4f82-88da-414bf23ba515", + "metadata": {}, + "outputs": [], + "source": [ + "output_folder=\"${DATA_FOLDER}/national_address_register/output/2024-06\"\n", + "output_file=\"${output_folder}/nar_2024_06.parquet\"\n", + "echo \"Exporting silver.nar_2024_06 table to ${output_file}\" \n", + "ogr2ogr \\\n", + " -lco COMPRESSION=\"ZSTD\" \\\n", + " -lco CREATOR=\"www.dataforcanada.org\" \\\n", + " -lco WRITE_COVERING_BBOX=\"YES\" \\\n", + " -lco SORT_BY_BBOX=\"YES\" \\\n", + " -f Parquet \\\n", + " ${output_file} \\\n", + " \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n", + " \"silver.nar_2024_06\"" + ] + }, + { + "cell_type": "markdown", + "id": "56ef7755-06ef-473c-8a1f-0129f6b1dc28", + "metadata": {}, + "source": [ + "# 3. Export National Address Register File (2023 vintage)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97df2904-c18d-4132-bc28-f0f454c48c87", + "metadata": {}, + "outputs": [], + "source": [ + "output_folder=\"${DATA_FOLDER}/national_address_register/output/2023\"\n", + "output_file=\"${output_folder}/nar_2023.parquet\"\n", + "echo \"Exporting silver.nar_2023 table to ${output_file}\"\n", + "ogr2ogr \\\n", + " -lco COMPRESSION=\"ZSTD\" \\\n", + " -lco CREATOR=\"www.dataforcanada.org\" \\\n", + " -lco WRITE_COVERING_BBOX=\"YES\" \\\n", + " -lco SORT_BY_BBOX=\"YES\" \\\n", + " -f Parquet \\\n", + " ${output_file} \\\n", + " \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n", + " \"silver.nar_2023\"" + ] + }, + { + "cell_type": "markdown", + "id": "f1bfd838-19aa-4de7-9ca1-61b1215d7865", + "metadata": {}, + "source": [ + "# 4. Export National Address Register File (2022 vintage)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2f1210f-0d9c-45de-a019-a438ff9dfa4a", + "metadata": {}, + "outputs": [], + "source": [ + "output_folder=\"${DATA_FOLDER}/national_address_register/output/2022\"\n", + "output_file=\"${output_folder}/nar_2022.parquet\"\n", + "echo \"Exporting silver.nar_2022 table to ${output_file}\"\n", + "ogr2ogr \\\n", + " -lco COMPRESSION=\"ZSTD\" \\\n", + " -lco CREATOR=\"www.dataforcanada.org\" \\\n", + " -lco WRITE_COVERING_BBOX=\"YES\" \\\n", + " -lco SORT_BY_BBOX=\"YES\" \\\n", + " -f Parquet \\\n", + " ${output_file} \\\n", + " \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n", + " \"silver.nar_2022\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Bash", + "language": "bash", + "name": "bash" + }, + "language_info": { + "codemirror_mode": "shell", + "file_extension": ".sh", + "mimetype": "text/x-sh", + "name": "bash" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/national_address_register/load.sh b/national_address_register/load.sh deleted file mode 100755 index a62a13c..0000000 --- a/national_address_register/load.sh +++ /dev/null @@ -1,167 +0,0 @@ -#!/bin/bash - -import_to_postgis() { - local filepath=$1 - local table_name=$2 - local extra_parameters=${@:3} - - # Virtual file system - if [[ ${filepath: -4} = '.zip' ]]; then - local filepath="/vsizip/${filepath}" - fi - - echo "Importing ${filepath}" - ogr2ogr \ - --config PG_USE_COPY YES \ - -overwrite \ - -f "PostgreSQL" \ - "PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432" \ - -lco GEOMETRY_NAME=geom \ - -progress \ - -gt 500000 \ - -t_srs EPSG:3857 \ - -nln ${table_name} \ - ${extra_parameters} \ - ${filepath} -} - -concatenate_csvs() { - # Concatenates all of the CSVs in the directory - local input_directory=$1 - local output_file=$2 - for address_file in $(ls ${input_directory}/*.csv); - do - echo "Processing ${address_file}. Adding to ${output_file}" - tail -n +2 $address_file >> ${output_file} - done -} - -INPUT_FOLDER="${DATA_FOLDER}/national_address_register/input" -EXTRACTED_FOLDER="${DATA_FOLDER}/national_address_register/extracted" -SCRATCH_FOLDER="${DATA_FOLDER}/national_address_register/scratch" - -import_202412() { - # Process 202412 - # Extract files - echo "Extracting ${INPUT_FOLDER}/202412.zip" - unzip -q -n ${INPUT_FOLDER}/202412.zip -d ${EXTRACTED_FOLDER}/202412 - if [ ! -f ${SCRATCH_FOLDER}/statcan_nar_addresses_202412.csv ] - then - echo "Adding header file to ${SCRATCH_FOLDER}/statcan_nar_addresses_202412.csv" - echo "LOC_GUID,ADDR_GUID,APT_NO_LABEL,CIVIC_NO,CIVIC_NO_SUFFIX,OFFICIAL_STREET_NAME,OFFICIAL_STREET_TYPE,OFFICIAL_STREET_DIR,PROV_CODE,CSD_ENG_NAME,CSD_FRE_NAME,CSD_TYPE_ENG_CODE,CSD_TYPE_FRE_CODE,MAIL_STREET_NAME,MAIL_STREET_TYPE,MAIL_STREET_DIR,MAIL_MUN_NAME,MAIL_PROV_ABVN,MAIL_POSTAL_CODE,BG_DLS_LSD,BG_DLS_QTR,BG_DLS_SCTN,BG_DLS_TWNSHP,BG_DLS_RNG,BG_DLS_MRD,BG_X,BG_Y,BU_N_CIVIC_ADD,BU_USE" > ${SCRATCH_FOLDER}/statcan_nar_addresses_202412.csv - fi - - if [ $(head ${SCRATCH_FOLDER}/statcan_nar_addresses_202412.csv | wc -l) -ne 10 ] - then - echo "Appending Addresses CSVs to ${SCRATCH_FOLDER}/statcan_nar_addresses_202412.csv" - concatenate_csvs "${EXTRACTED_FOLDER}/202412/Addresses" "${SCRATCH_FOLDER}/statcan_nar_addresses_202412.csv" - fi - - if [ ! -f ${SCRATCH_FOLDER}/statcan_nar_locations_202412.csv ] - then - echo "Adding header file to ${SCRATCH_FOLDER}/statcan_nar_locations_202412.csv" - echo "LOC_GUID,CSD_CODE,FED_CODE,FED_ENG_NAME,FED_FRE_NAME,ER_CODE,ER_ENG_NAME,ER_FRE_NAME,REPPOINT_LATITUDE,REPPOINT_LONGITUDE" > ${SCRATCH_FOLDER}/statcan_nar_locations_202412.csv - fi - - if [ $(head ${SCRATCH_FOLDER}/statcan_nar_locations_202412.csv | wc -l) -ne 10 ] - then - echo "Appending Addresses CSVs to ${SCRATCH_FOLDER}/statcan_nar_locations_202412.csv" - concatenate_csvs "${EXTRACTED_FOLDER}/202412/Locations" "${SCRATCH_FOLDER}/statcan_nar_locations_202412.csv" - fi - python national_address_register/process.py ${SCRATCH_FOLDER}/statcan_nar_addresses_202412.csv ${SCRATCH_FOLDER}/statcan_nar_locations_202412.csv 202412 utf-8 -} - -import_202406() { - # Process 202406 - echo "Extracting ${INPUT_FOLDER}/2024.zip" - unzip -q -n ${INPUT_FOLDER}/2024.zip -d ${EXTRACTED_FOLDER}/202406 - if [ ! -f ${SCRATCH_FOLDER}/statcan_nar_addresses_202406.csv ] - then - echo "Adding header file to ${SCRATCH_FOLDER}/statcan_nar_addresses_202406.csv" - echo "LOC_GUID,ADDR_GUID,APT_NO_LABEL,CIVIC_NO,CIVIC_NO_SUFFIX,OFFICIAL_STREET_NAME,OFFICIAL_STREET_TYPE,OFFICIAL_STREET_DIR,PROV_CODE,CSD_ENG_NAME,CSD_FRE_NAME,CSD_TYPE_ENG_CODE,CSD_TYPE_FRE_CODE,MAIL_STREET_NAME,MAIL_STREET_TYPE,MAIL_STREET_DIR,MAIL_MUN_NAME,MAIL_PROV_ABVN,MAIL_POSTAL_CODE,BG_DLS_LSD,BG_DLS_QTR,BG_DLS_SCTN,BG_DLS_TWNSHP,BG_DLS_RNG,BG_DLS_MRD,BG_X,BG_Y,BU_N_CIVIC_ADD,BU_USE" > ${SCRATCH_FOLDER}/statcan_nar_addresses_202406.csv - fi - - if [ $(head ${SCRATCH_FOLDER}/statcan_nar_addresses_202406.csv | wc -l) -ne 10 ] - then - echo "Appending Addresses CSVs to ${SCRATCH_FOLDER}/statcan_nar_addresses_202406.csv" - concatenate_csvs "${EXTRACTED_FOLDER}/202406/Addresses" "${SCRATCH_FOLDER}/statcan_nar_addresses_202406.csv" - fi - - if [ ! -f ${SCRATCH_FOLDER}/statcan_nar_locations_202406.csv ] - then - echo "Adding header file to ${SCRATCH_FOLDER}/statcan_nar_locations_202406.csv" - echo "LOC_GUID,CSD_CODE,FED_CODE,FED_ENG_NAME,FED_FRE_NAME,ER_CODE,ER_ENG_NAME,ER_FRE_NAME,REPPOINT_LATITUDE,REPPOINT_LONGITUDE" > ${SCRATCH_FOLDER}/statcan_nar_locations_202406.csv - fi - - if [ $(head ${SCRATCH_FOLDER}/statcan_nar_locations_202406.csv | wc -l) -ne 10 ] - then - echo "Appending Addresses CSVs to ${SCRATCH_FOLDER}/statcan_nar_locations_202406.csv" - concatenate_csvs "${EXTRACTED_FOLDER}/202406/Locations" "${SCRATCH_FOLDER}/statcan_nar_locations_202406.csv" - fi - python national_address_register/process.py ${SCRATCH_FOLDER}/statcan_nar_addresses_202406.csv ${SCRATCH_FOLDER}/statcan_nar_locations_202406.csv 202406 utf-8 -} - -import_2023() { - # Process 2023 - echo "Extracting ${INPUT_FOLDER}/2023.zip" - unzip -q -n ${INPUT_FOLDER}/2023.zip -d ${EXTRACTED_FOLDER}/2023 - if [ ! -f ${SCRATCH_FOLDER}/statcan_nar_addresses_2023.csv ] - then - echo "Adding header file to ${SCRATCH_FOLDER}/statcan_nar_addresses_2023.csv" - echo "LOC_GUID,ADDR_GUID,APT_NO_LABEL,CIVIC_NO,CIVIC_NO_SUFFIX,OFFICIAL_STREET_NAME,OFFICIAL_STREET_TYPE,OFFICIAL_STREET_DIR,PROV_CODE,CSD_ENG_NAME,CSD_FRE_NAME,CSD_TYPE_ENG_CODE,CSD_TYPE_FRE_CODE,MAIL_STREET_NAME,MAIL_STREET_TYPE,MAIL_STEET_DIR,MAIL_MUN_NAME,MAIL_PROV_ABVN,MAIL_POSTAL_CODE,BG_DLS_LSD,BG_DLS_QTR,BG_DLS_SCTN,BG_DLS_TWNSHP,BG_DLS_RNG,BG_DLS_MRD,BG_X,BG_Y,BU_N_CIVIC_ADD,BU_USE" > ${SCRATCH_FOLDER}/statcan_nar_addresses_2023.csv - fi - - if [ $(head ${SCRATCH_FOLDER}/statcan_nar_addresses_2023.csv | wc -l) -ne 10 ] - then - echo "Appending Addresses CSVs to ${SCRATCH_FOLDER}/statcan_nar_addresses_2023.csv" - concatenate_csvs "${EXTRACTED_FOLDER}/2023/Addresses" "${SCRATCH_FOLDER}/statcan_nar_addresses_2023.csv" - fi - - if [ ! -f ${SCRATCH_FOLDER}/statcan_nar_locations_2023.csv ] - then - echo "Adding header file to ${SCRATCH_FOLDER}/statcan_nar_locations_2023.csv" - echo "LOC_GUID,CSD_CODE,FED_2021_CODE,FED_2021_ENG_NAME,FED_2021_FRE_NAME,ER_2021_CODE,ER_2021_ENG_NAME,ER_2021_FRE_NAME,REPPOINT_LATITUDE,REPPOINT_LONGITUDE" > ${SCRATCH_FOLDER}/statcan_nar_locations_2023.csv - fi - - if [ $(head ${SCRATCH_FOLDER}/statcan_nar_locations_2023.csv | wc -l) -ne 10 ] - then - echo "Appending Addresses CSVs to ${SCRATCH_FOLDER}/statcan_nar_locations_2023.csv" - concatenate_csvs "${EXTRACTED_FOLDER}/2022/Locations" "${SCRATCH_FOLDER}/statcan_nar_locations_2023.csv" - fi - python national_address_register/process.py ${SCRATCH_FOLDER}/statcan_nar_addresses_2023.csv ${SCRATCH_FOLDER}/statcan_nar_locations_2023.csv 2023 latin-1 -} - -import_2022() { - # Process 2022 - echo "Extracting ${INPUT_FOLDER}/2022.zip" - unzip -q -n ${INPUT_FOLDER}/2022.zip -d ${EXTRACTED_FOLDER}/2022 - if [ ! -f ${SCRATCH_FOLDER}/statcan_nar_addresses_2022.csv ] - then - echo "Adding header file to ${SCRATCH_FOLDER}/statcan_nar_addresses_2022.csv" - echo "LOC_GUID,ADDR_GUID,CIVIC_NO,CIVIC_NO_SUFFIX,APT_NO_LABEL,OFFICIAL_STREET_NAME,OFFICIAL_STREET_TYPE,OFFICIAL_STREET_DIR,PROV_CODE,CSD_ENG_NAME,CSD_FRE_NAME,CSD_TYPE_ENG_CODE,CSD_TYPE_FRE_CODE,MAIL_STREET_NAME,MAIL_STREET_TYPE,MAIL_STREET_DIR,MAIL_MUN_NAME,MAIL_POSTAL_CODE,MAIL_PROV_ABVN,BG_DLS_LSD,BG_DLS_QTR,BG_DLS_SCTN,BG_DLS_TWNSHP,BG_DLS_RNG,BG_DLS_MRD,BG_X,BG_Y,BU_N_CIVIC_ADD,BU_USE" > ${SCRATCH_FOLDER}/statcan_nar_addresses_2022.csv - fi - - if [ $(head ${SCRATCH_FOLDER}/statcan_nar_addresses_2022.csv | wc -l) -ne 10 ] - then - echo "Appending Addresses CSVs to ${SCRATCH_FOLDER}/statcan_nar_addresses_2022.csv" - concatenate_csvs "${EXTRACTED_FOLDER}/2022/Addresses" "${SCRATCH_FOLDER}/statcan_nar_addresses_2022.csv" - fi - - if [ ! -f ${SCRATCH_FOLDER}/statcan_nar_locations_2022.csv ] - then - echo "Adding header file to ${SCRATCH_FOLDER}/statcan_nar_locations_2022.csv" - echo "LOC_GUID,CSD_CODE,FED_2016_CODE,FED_2016_ENG_NAME,FED_2016_FRE_NAME,ER_2016_CODE,ER_2016_ENG_NAME,ER_2016_FRE_NAME,REPPOINT_LATITUDE,REPPOINT_LONGITUDE" > ${SCRATCH_FOLDER}/statcan_nar_locations_2022.csv - fi - - if [ $(head ${SCRATCH_FOLDER}/statcan_nar_locations_2022.csv | wc -l) -ne 10 ] - then - echo "Appending Addresses CSVs to ${SCRATCH_FOLDER}/statcan_nar_locations_2022.csv" - concatenate_csvs "${EXTRACTED_FOLDER}/2022/Locations" "${SCRATCH_FOLDER}/statcan_nar_locations_2022.csv" - fi - python national_address_register/process.py ${SCRATCH_FOLDER}/statcan_nar_addresses_2022.csv ${SCRATCH_FOLDER}/statcan_nar_locations_2022.csv 2022 latin-1 -} - -import_202412 -import_202406 -import_2023 -import_2022 \ No newline at end of file diff --git a/national_address_register/national_address_register_files.txt b/national_address_register/national_address_register_files.txt deleted file mode 100644 index c02b883..0000000 --- a/national_address_register/national_address_register_files.txt +++ /dev/null @@ -1,8 +0,0 @@ -# December 2024 -https://www150.statcan.gc.ca/n1/pub/46-26-0002/2022001/202412.zip -# June 2024 -https://www150.statcan.gc.ca/n1/pub/46-26-0002/2022001/2024.zip -# 2023 -https://www150.statcan.gc.ca/n1/pub/46-26-0002/2022001/2023.zip -# 2022 -https://www150.statcan.gc.ca/n1/pub/46-26-0002/2022001/2022.zip \ No newline at end of file diff --git a/national_address_register/national_address_register_files_2022.txt b/national_address_register/national_address_register_files_2022.txt new file mode 100644 index 0000000..5b6a21d --- /dev/null +++ b/national_address_register/national_address_register_files_2022.txt @@ -0,0 +1,2 @@ +# 2022 +https://data.dataforcanada.org/archive/statistics_canada/national_address_register/2022/2022.zip \ No newline at end of file diff --git a/national_address_register/national_address_register_files_2023.txt b/national_address_register/national_address_register_files_2023.txt new file mode 100644 index 0000000..4a8cdd5 --- /dev/null +++ b/national_address_register/national_address_register_files_2023.txt @@ -0,0 +1,2 @@ +# 2023 +https://data.dataforcanada.org/archive/statistics_canada/national_address_register/2023/2023.zip \ No newline at end of file diff --git a/national_address_register/national_address_register_files_2024_06.txt b/national_address_register/national_address_register_files_2024_06.txt new file mode 100644 index 0000000..0107ec5 --- /dev/null +++ b/national_address_register/national_address_register_files_2024_06.txt @@ -0,0 +1,2 @@ +# June 2024 +https://data.dataforcanada.org/archive/statistics_canada/national_address_register/2024-06/2024.zip \ No newline at end of file diff --git a/national_address_register/national_address_register_files_2024_12.txt b/national_address_register/national_address_register_files_2024_12.txt new file mode 100644 index 0000000..c968a62 --- /dev/null +++ b/national_address_register/national_address_register_files_2024_12.txt @@ -0,0 +1,2 @@ +# December 2024 +https://data.dataforcanada.org/archive/statistics_canada/national_address_register/2024-12/202412.zip \ No newline at end of file diff --git a/national_address_register/process.py b/national_address_register/process.py deleted file mode 100755 index 7cc744a..0000000 --- a/national_address_register/process.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 -import os -import sys - -import geopandas as gpd -import pandas as pd -from sqlalchemy import create_engine - -statcan_nar_addresses_csv = sys.argv[1] -statcan_nar_locations_csv = sys.argv[2] -vintage = sys.argv[3] -encoding = sys.argv[4] - -print(f"Reading {statcan_nar_addresses_csv}") -statcan_nar_addresses = pd.read_csv(filepath_or_buffer=statcan_nar_addresses_csv, - dtype={ - "CIVIC_NO": "Int32", - "PROV_CODE": object, - "BU_USE": "Int8", - "BG_DLS_LSD": object, - "BG_DLS_QTR": object, - "BG_DLS_SCTN": object, - "BG_DLS_TWNSHP": object, - "BG_DLS_RNG": object, - "BG_DLS_MRD": object - }, - encoding=encoding) - -print(f"Reading {statcan_nar_locations_csv}") -statcan_nar_locations = pd.read_csv(filepath_or_buffer=statcan_nar_locations_csv, - usecols=["LOC_GUID", - "REPPOINT_LATITUDE", - "REPPOINT_LONGITUDE"], - encoding=encoding) - -print(f"Combining {statcan_nar_addresses_csv} and {statcan_nar_locations_csv}") -statcan_nar_addresses_combined = pd.merge(statcan_nar_addresses, - statcan_nar_locations, - on="LOC_GUID", how="inner") - -del statcan_nar_addresses -del statcan_nar_locations - -DATABASE = os.environ.get("POSTGRES_DB") -HOST = os.environ.get("WAREHOUSE_PG_HOST") -USER = os.environ.get("POSTGRES_USER") -PASSWORD = os.environ.get("POSTGRES_PASSWORD") - -engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:5432/{DATABASE}") - -print("Creating geodataframe from combined address file") -gdf = gpd.GeoDataFrame( - statcan_nar_addresses_combined, - geometry=gpd.points_from_xy(statcan_nar_addresses_combined.REPPOINT_LONGITUDE, - statcan_nar_addresses_combined.REPPOINT_LATITUDE), - crs="EPSG:4326" -) - -print("Dropping 'REPPOINT_LATITUDE', 'REPPOINT_LONGITUDE' from geodataframe") -gdf.drop(columns=["REPPOINT_LATITUDE", "REPPOINT_LONGITUDE"], - inplace=True) - -print("Converting geodataframe to EPSG:3857") -gdf.to_crs(3857, inplace=True) -print(f"Loading geodatframe to PostgreSQL as statcan_nar_addresses_combined_{vintage}") -gdf.to_postgis(name=f"statcan_nar_addresses_combined_{vintage}", - con=engine, - chunksize=150000) diff --git a/national_address_register/process.sh b/national_address_register/process.sh new file mode 100755 index 0000000..87ebbad --- /dev/null +++ b/national_address_register/process.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +INPUT_FOLDER="${DATA_FOLDER}/national_address_register/input" +EXTRACTED_FOLDER="${DATA_FOLDER}/national_address_register/extracted" + +process_202412() { + # Process 2024-12 vintage + # Extract files + echo "Extracting ${INPUT_FOLDER}/2024-12/202412.zip to ${EXTRACTED_FOLDER}/2024-12" + unzip -q -n ${INPUT_FOLDER}/2024-12/202412.zip -d ${EXTRACTED_FOLDER}/2024-12 + jupyter execute process_2024_12.ipynb +} + +process_202406() { + # Process 2024-06 vintage + echo "Extracting ${INPUT_FOLDER}/2024.zip" + unzip -q -n ${INPUT_FOLDER}/2024.zip -d ${EXTRACTED_FOLDER}/2024-06 + # Encoding is utf-8 +} + +process_2023() { + # Process 2023 + echo "Extracting ${INPUT_FOLDER}/2023.zip" + unzip -q -n ${INPUT_FOLDER}/2023.zip -d ${EXTRACTED_FOLDER}/2023 + # Encoding is latin-1 +} + +process_2022() { + # Process 2022 + echo "Extracting ${INPUT_FOLDER}/2022.zip" + unzip -q -n ${INPUT_FOLDER}/2022.zip -d ${EXTRACTED_FOLDER}/2022 + # Encoding is latin-1 +} + +process_202412 +#process_202406 +#process_2023 +#process_2022 \ No newline at end of file diff --git a/national_address_register/process_2024_12.ipynb b/national_address_register/process_2024_12.ipynb new file mode 100644 index 0000000..9c383ff --- /dev/null +++ b/national_address_register/process_2024_12.ipynb @@ -0,0 +1,547 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "11c50f98-ddeb-4af0-b69a-743f915fa904", + "metadata": {}, + "source": [ + "# Experimenting with processing this file. Still need to figure out how to structure this file" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "95feb0e4-b3b6-4235-8c8d-bc3652e82b3a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Buckaroo has been enabled as the default DataFrame viewer. To return to default dataframe visualization use `from buckaroo import disable; disable()`\n" + ] + } + ], + "source": [ + "#!/usr/bin/env python\n", + "# coding: utf-8\n", + "import gc\n", + "import glob\n", + "import os\n", + "import sys \n", + "\n", + "import buckaroo\n", + "import duckdb\n", + "from IPython.core.interactiveshell import InteractiveShell \n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "from sqlalchemy import create_engine\n", + "from sqlalchemy import text\n", + "\n", + "# Enable multiple outputs per cell\n", + "InteractiveShell.ast_node_interactivity = \"all\"\n", + "# Show all columns\n", + "pd.set_option('display.max_columns', None)\n", + "\n", + "DATABASE = os.environ.get(\"POSTGRES_DB\")\n", + "USER = os.environ.get(\"POSTGRES_USER\")\n", + "PASSWORD = os.environ.get(\"POSTGRES_PASSWORD\")\n", + "\n", + "engine = create_engine(f\"postgresql://{USER}:{PASSWORD}@db:5432/{DATABASE}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e10bc182-d0d6-4aa3-99b3-2fc396f9ce34", + "metadata": {}, + "outputs": [], + "source": [ + "input_folder = '/data/national_address_register/extracted'" + ] + }, + { + "cell_type": "markdown", + "id": "7f170d27-14ca-488a-811d-1c9836264bb6", + "metadata": {}, + "source": [ + "# 1. Process 2024-12 vintage" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b27e0edc-59ca-4a63-8bff-1d85909aa174", + "metadata": {}, + "outputs": [], + "source": [ + "nar_addresses_csvs = glob.glob(f'{input_folder}/2024-12/Addresses/*.csv')\n", + "nar_locations_csvs = glob.glob(f'{input_folder}/2024-12/Locations/*.csv')\n", + "encoding = 'utf-8'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3fcca833-8547-43b1-b5c7-e5f463ef0bbc", + "metadata": {}, + "outputs": [], + "source": [ + "def process_nar_locations_csvs(csvs_to_process, encoding):\n", + " \"\"\"\n", + " 1. Reads subset of fields for National Address Register locations\n", + " 2. Appends all of the processed CSVs as one dataframe\n", + " \"\"\"\n", + " dataframes_to_concatenate = []\n", + " for filename in csvs_to_process:\n", + " print(f\"Processing {filename}\")\n", + " params = {\n", + " 'filepath_or_buffer': filename,\n", + " 'encoding': encoding,\n", + " 'usecols': ['LOC_GUID', \n", + " 'REPPOINT_LATITUDE', \n", + " 'REPPOINT_LONGITUDE'\n", + " ]\n", + " }\n", + " nar_location_df = pd.read_csv(**params)\n", + " # Lowercase columns\n", + " nar_location_df.columns = [x.lower() for x in nar_location_df.columns]\n", + " dataframes_to_concatenate.append(nar_location_df)\n", + " \n", + " print(\"Concatenating all dataframes into one\")\n", + " nar_locations_df = pd.concat(dataframes_to_concatenate)\n", + " \n", + " return nar_locations_df\n", + "\n", + "def process_nar_addresses_csvs(csvs_to_process, encoding):\n", + " \"\"\"\n", + " 1. Reads subset of fields for National Address Register addresses\n", + " 2. Appends all of the processed CSVs as one dataframe\n", + " \"\"\"\n", + " dataframes_to_concatenate = []\n", + " for filename in csvs_to_process:\n", + " print(f\"Processing {filename}\")\n", + " params = {\n", + " 'filepath_or_buffer': filename,\n", + " 'encoding': encoding,\n", + " 'usecols': ['LOC_GUID', \n", + " 'ADDR_GUID', \n", + " 'APT_NO_LABEL',\n", + " 'CIVIC_NO',\n", + " 'CIVIC_NO_SUFFIX',\n", + " 'OFFICIAL_STREET_NAME',\n", + " 'OFFICIAL_STREET_TYPE',\n", + " 'OFFICIAL_STREET_DIR',\n", + " 'MAIL_STREET_NAME',\n", + " 'MAIL_STREET_TYPE',\n", + " 'MAIL_STREET_DIR',\n", + " 'MAIL_MUN_NAME',\n", + " 'MAIL_POSTAL_CODE',\n", + " 'BG_DLS_LSD',\n", + " 'BG_DLS_QTR',\n", + " 'BG_DLS_SCTN',\n", + " 'BG_DLS_RNG',\n", + " 'BG_DLS_MRD',\n", + " # Removing since REPPOINT_LATITUDE and REPPOINT_LONGITUDE seem to have same purpose\n", + " #'BG_X',\n", + " #'BG_Y',\n", + " 'BU_USE',\n", + " 'BU_N_CIVIC_ADD'\n", + " ],\n", + " 'dtype': {\n", + " \"CIVIC_NO\": \"Int32\", \n", + " \"PROV_CODE\": object,\n", + " \"BU_USE\": \"Int8\",\n", + " \"BG_DLS_LSD\": object,\n", + " \"BG_DLS_QTR\": object,\n", + " \"BG_DLS_SCTN\": object,\n", + " \"BG_DLS_TWNSHP\": object,\n", + " \"BG_DLS_RNG\": object,\n", + " \"BG_DLS_MRD\": object\n", + " }\n", + " }\n", + " nar_address_df = pd.read_csv(**params)\n", + " # Lowercase columns\n", + " nar_address_df.columns = [x.lower() for x in nar_address_df.columns]\n", + " dataframes_to_concatenate.append(nar_address_df)\n", + " \n", + " print(\"Concatenating all dataframes into one\")\n", + " nar_addresses_df = pd.concat(dataframes_to_concatenate, ignore_index=True)\n", + " \n", + " return nar_addresses_df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f9bb3a4d-d913-4b9d-aa49-7ec2c4dfef60", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_10.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_11.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_12.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_13.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_24_part_1.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_24_part_2.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_24_part_3.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_24_part_4.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_35_part_1.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_35_part_2.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_35_part_3.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_35_part_4.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_35_part_5.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_46.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_47.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_48_part_1.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_48_part_2.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_59_part_1.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_59_part_2.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_60.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_61.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Locations/Location_62.csv\n", + "Concatenating all dataframes into one\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_10.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_11.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_12.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_13.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_943/484766080.py:74: DtypeWarning: Columns (27) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " nar_address_df = pd.read_csv(**params)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_24_part_1.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_24_part_2.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_24_part_3.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_24_part_4.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_24_part_5.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_35_part_1.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_35_part_2.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_35_part_3.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_35_part_4.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_35_part_5.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_35_part_6.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_35_part_7.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_46.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_47.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_48_part_1.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_48_part_2.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_48_part_3.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_59_part_1.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_59_part_2.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_59_part_3.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_60.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_61.csv\n", + "Processing /data/national_address_register/extracted/2024-12/Addresses/Address_62.csv\n", + "Concatenating all dataframes into one\n" + ] + } + ], + "source": [ + "nar_locations = process_nar_locations_csvs(nar_locations_csvs, encoding)\n", + "nar_addresses = process_nar_addresses_csvs(nar_addresses_csvs, encoding)" + ] + }, + { + "cell_type": "markdown", + "id": "b49f6602-9bda-410f-82a2-54a5711311b0", + "metadata": {}, + "source": [ + "# TODO\n", + "- look into why there are locations with empty reppoint_latitude and reppoint_longitude\n", + " - There are 84,285 records that have an empty reppoint_latitude and reppoint_longitude" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f3600645-c350-473f-81ea-0bd9ebe70e37", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Combining nar_addresses and nar_locations\n" + ] + }, + { + "data": { + "text/plain": [ + "40" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"Combining nar_addresses and nar_locations\")\n", + "nar_addresses_combined = duckdb.sql(\"\"\"\n", + "SELECT a.addr_guid, a.apt_no_label, a.civic_no, a.civic_no_suffix, a.official_street_name, a.mail_street_name, a.official_street_type, a.mail_street_type,\n", + " a.official_street_dir AS official_street_direction, a.mail_street_dir AS mail_street_direction, a.mail_postal_code, a.mail_mun_name AS mail_municipality_name, \n", + " a.bu_n_civic_add, a.bu_use,\n", + " a.bg_dls_lsd, a.bg_dls_qtr, a.bg_dls_sctn, a.bg_dls_rng, a.bg_dls_mrd,\n", + " b.reppoint_latitude, b.reppoint_longitude\n", + "FROM nar_addresses AS a,\n", + " nar_locations AS b\n", + "WHERE a.loc_guid = b.loc_guid AND b.reppoint_latitude IS NOT NULL\n", + "\"\"\").df()\n", + "\n", + "del nar_addresses\n", + "del nar_locations\n", + "gc.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0d35eec6-8ab3-4e7a-a7fd-2915333e27f9", + "metadata": {}, + "outputs": [], + "source": [ + "gdf = gpd.GeoDataFrame(\n", + " nar_addresses_combined, \n", + " geometry=gpd.points_from_xy(nar_addresses_combined.reppoint_longitude,\n", + " nar_addresses_combined.reppoint_latitude),\n", + " crs=\"EPSG:4326\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "614ad773-adb3-413c-8b8f-da721afc85cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dropping 'reppoint_latitude', 'reppoint_longitude' from geodataframe\n" + ] + } + ], + "source": [ + "print(\"Dropping 'reppoint_latitude', 'reppoint_longitude' from geodataframe\")\n", + "gdf.drop(columns=[\"reppoint_latitude\", \"reppoint_longitude\"], \n", + " inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cac024f8-6eb5-48ea-88ad-289edf505ba3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "del nar_addresses_combined\n", + "gc.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d2f81e61-1fc7-4518-ad1f-d515e16ce22c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading geodataframe to PostgreSQL as bronze.nar_2024_12\n" + ] + } + ], + "source": [ + "print(\"Loading geodataframe to PostgreSQL as bronze.nar_2024_12\")\n", + "gdf.to_postgis(name=\"nar_2024_12\", \n", + " schema='bronze',\n", + " con=engine,\n", + " chunksize=150000)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b7fb6976-0b95-445e-b1e8-022c39bce25b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "584" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "del(gdf)\n", + "gc.collect()" + ] + }, + { + "cell_type": "markdown", + "id": "384cd07b-c79f-42e8-81d6-ecbe78f167b1", + "metadata": {}, + "source": [ + "## Link to 2021 geographies\n", + "There are 10 records that were not linked to 2021 geographies" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f7f56297-150b-4ad2-a5c2-2ee5e477f978", + "metadata": {}, + "outputs": [], + "source": [ + "sql = \"\"\"\n", + "DROP TABLE IF EXISTS silver.nar_2024_12;\n", + "CREATE TABLE silver.nar_2024_12 AS\n", + "SELECT DISTINCT\n", + " b.country_dguid,\n", + " b.country_en_name,\n", + " b.country_fr_name,\n", + " b.country_en_abbreviation,\n", + " b.country_fr_abbreviation,\n", + " b.grc_dguid,\n", + " b.grc_en_name,\n", + " b.grc_fr_name,\n", + " b.pr_dguid,\n", + " b.pr_en_name,\n", + " b.pr_fr_name,\n", + " b.pr_en_abbreviation,\n", + " b.pr_fr_abbreviation,\n", + " b.pr_iso_code,\n", + " b.car_dguid,\n", + " b.car_en_name,\n", + " b.car_fr_name,\n", + " b.er_dguid,\n", + " b.er_name,\n", + " b.cd_dguid,\n", + " b.cd_name,\n", + " b.cd_type,\n", + " b.ccs_dguid,\n", + " b.ccs_name,\n", + " b.cma_dguid,\n", + " b.cma_p_dguid,\n", + " b.cma_name,\n", + " b.cma_type,\n", + " b.csd_dguid,\n", + " b.csd_name,\n", + " b.csd_type,\n", + " b.sac_type,\n", + " b.sac_code,\n", + " b.fed_dguid,\n", + " b.fed_name,\n", + " b.fed_en_name,\n", + " b.fed_fr_name,\n", + " b.ct_dguid,\n", + " b.ada_dguid,\n", + " b.da_dguid,\n", + " b.db_dguid,\n", + " a.addr_guid,\n", + " a.apt_no_label,\n", + " a.civic_no,\n", + " a.civic_no_suffix,\n", + " a.official_street_name, \n", + " a.mail_street_name, \n", + " a.official_street_type,\n", + " a.mail_street_type,\n", + " a.official_street_direction,\n", + " a.mail_street_direction,\n", + " a.mail_postal_code,\n", + " a.mail_municipality_name,\n", + " a.bu_n_civic_add,\n", + " a.bu_use,\n", + " a.bg_dls_lsd,\n", + " a.bg_dls_qtr,\n", + " a.bg_dls_sctn,\n", + " a.bg_dls_rng,\n", + " a.bg_dls_mrd,\n", + " a.geometry AS geom\n", + "FROM bronze.nar_2024_12 AS a,\n", + " silver.db_2021_digital AS b\n", + "WHERE ST_Intersects(a.geometry, b.geom);\n", + "\n", + "-- Create spatial index\n", + "CREATE INDEX nar_2024_12_geom_idx ON silver.nar_2024_12 USING gist (geom) WITH (\n", + " fillfactor = 100\n", + ");\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ae7babb4-a3e6-4c0b-93a6-3b52a4f89a1b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with engine.connect() as conn:\n", + " conn.execute(text(sql))\n", + " conn.commit()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}