Files
2025-06-06 22:51:49 +00:00

230 lines
9.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"id": "b6e053ec",
"metadata": {},
"outputs": [],
"source": [
"DATA_FOLDER=/data\n",
"\n",
"source ../.env"
]
},
{
"cell_type": "markdown",
"id": "12eca225-3d05-4bb7-95fa-7b9df694f53d",
"metadata": {},
"source": [
"# 1. Export Digital Boundary Files"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "22183463",
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"output_folder=\"${DATA_FOLDER}/boundaries/output/2021/digital_boundary_files\""
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "a0372dac",
"metadata": {},
"outputs": [],
"source": [
"files=('country_2021_digital'\n",
" 'grc_2021_digital'\n",
" 'pr_2021_digital'\n",
" 'er_2021_digital'\n",
" 'car_2021_digital'\n",
" 'cd_2021_digital'\n",
" 'ccs_2021_digital'\n",
" 'cma_2021_digital'\n",
" 'csd_2021_digital'\n",
" 'fed_2021_2013_digital'\n",
" 'dpl_2021_digital'\n",
" 'fsa_2021_digital'\n",
" 'pop_ctr_2021_digital'\n",
" 'ct_2021_digital'\n",
" 'da_2021_digital'\n",
" 'db_2021_digital'\n",
" 'ada_2021_digital'\n",
" )"
]
},
{
"cell_type": "markdown",
"id": "04a2c69a-6204-4fa5-b6e2-f0375f51f425",
"metadata": {},
"source": [
"## Export as Parquet"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4c5bb532",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Exporting silver.country_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/country_2021.parquet\n",
"Exporting silver.grc_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/grc_2021.parquet\n",
"Exporting silver.pr_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/pr_2021.parquet\n",
"Exporting silver.er_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/er_2021.parquet\n",
"Exporting silver.car_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/car_2021.parquet\n",
"Exporting silver.cd_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/cd_2021.parquet\n",
"Exporting silver.ccs_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/ccs_2021.parquet\n",
"Exporting silver.cma_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/cma_2021.parquet\n",
"Exporting silver.csd_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/csd_2021.parquet\n",
"Exporting silver.fed_2021_2013_digital table to /data/boundaries/output/2021/digital_boundary_files/fed_2021_2013.parquet\n",
"Exporting silver.dpl_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/dpl_2021.parquet\n",
"Exporting silver.fsa_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/fsa_2021.parquet\n",
"Exporting silver.pop_ctr_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/pop_ctr_2021.parquet\n",
"Exporting silver.ct_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/ct_2021.parquet\n",
"Exporting silver.da_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/da_2021.parquet\n",
"Exporting silver.db_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/db_2021.parquet\n",
"Exporting silver.ada_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/ada_2021.parquet\n"
]
}
],
"source": [
"for file in ${files[@]}\n",
"do\n",
" filename=\"${file/_digital/}.parquet\"\n",
" output_file=\"${output_folder}/${filename}\"\n",
" echo \"Exporting silver.${file} table to ${output_file}\" \n",
" ogr2ogr \\\n",
" -lco COMPRESSION=\"ZSTD\" \\\n",
" -lco CREATOR=\"www.dataforcanada.org\" \\\n",
" -lco WRITE_COVERING_BBOX=\"YES\" \\\n",
" -lco SORT_BY_BBOX=\"YES\" \\\n",
" -f Parquet \\\n",
" ${output_file} \\\n",
" \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n",
" \"silver.${file}\"\n",
"done"
]
},
{
"cell_type": "markdown",
"id": "7d62468a-f950-4e63-8bc5-4e3324f6ed65",
"metadata": {},
"source": [
"# 2. Export Cartographic Boundary Files"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3334fd6f-f63c-4ff6-b646-f6c11de75efa",
"metadata": {},
"outputs": [],
"source": [
"output_folder=\"${DATA_FOLDER}/boundaries/output/2021/cartographic_boundary_files\""
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f2c21644-77e1-4ed6-856d-61912d10060c",
"metadata": {},
"outputs": [],
"source": [
"files=('country_2021_cartographic'\n",
" 'grc_2021_cartographic'\n",
" 'pr_2021_cartographic'\n",
" 'er_2021_cartographic'\n",
" 'car_2021_cartographic'\n",
" 'cd_2021_cartographic'\n",
" 'ccs_2021_cartographic'\n",
" 'cma_2021_cartographic'\n",
" 'csd_2021_cartographic'\n",
" 'fed_2021_2013_cartographic'\n",
" 'dpl_2021_cartographic'\n",
" 'fsa_2021_cartographic'\n",
" 'pop_ctr_2021_cartographic'\n",
" 'ct_2021_cartographic'\n",
" 'da_2021_cartographic'\n",
" 'db_2021_cartographic'\n",
" 'ada_2021_cartographic'\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "814e1471-7632-4db2-ae4d-6ac27564ada7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Exporting silver.country_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/country_2021.parquet\n",
"Exporting silver.grc_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/grc_2021.parquet\n",
"Exporting silver.pr_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/pr_2021.parquet\n",
"Exporting silver.er_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/er_2021.parquet\n",
"Exporting silver.car_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/car_2021.parquet\n",
"Exporting silver.cd_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/cd_2021.parquet\n",
"Exporting silver.ccs_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/ccs_2021.parquet\n",
"Exporting silver.cma_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/cma_2021.parquet\n",
"Exporting silver.csd_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/csd_2021.parquet\n",
"Exporting silver.fed_2021_2013_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/fed_2021_2013.parquet\n",
"Exporting silver.dpl_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/dpl_2021.parquet\n",
"Exporting silver.fsa_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/fsa_2021.parquet\n",
"Exporting silver.pop_ctr_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/pop_ctr_2021.parquet\n",
"Exporting silver.ct_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/ct_2021.parquet\n",
"Exporting silver.da_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/da_2021.parquet\n",
"Exporting silver.db_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/db_2021.parquet\n",
"Exporting silver.ada_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/ada_2021.parquet\n"
]
}
],
"source": [
"for file in ${files[@]}\n",
"do\n",
" filename=\"${file/_cartographic/}.parquet\"\n",
" output_file=\"${output_folder}/${filename}\"\n",
" echo \"Exporting silver.${file} table to ${output_file}\" \n",
" ogr2ogr \\\n",
" -lco COMPRESSION=\"ZSTD\" \\\n",
" -lco CREATOR=\"www.dataforcanada.org\" \\\n",
" -lco WRITE_COVERING_BBOX=\"YES\" \\\n",
" -lco SORT_BY_BBOX=\"YES\" \\\n",
" -f Parquet \\\n",
" ${output_file} \\\n",
" \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n",
" \"silver.${file}\"\n",
"done"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Bash",
"language": "bash",
"name": "bash"
},
"language_info": {
"codemirror_mode": "shell",
"file_extension": ".sh",
"mimetype": "text/x-sh",
"name": "bash"
}
},
"nbformat": 4,
"nbformat_minor": 5
}