mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 22:20:56 +02:00
230 lines
9.2 KiB
Plaintext
230 lines
9.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "b6e053ec",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"DATA_FOLDER=/data\n",
|
|
"\n",
|
|
"source ../.env"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "12eca225-3d05-4bb7-95fa-7b9df694f53d",
|
|
"metadata": {},
|
|
"source": [
|
|
"# 1. Export Digital Boundary Files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "22183463",
|
|
"metadata": {
|
|
"vscode": {
|
|
"languageId": "shellscript"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"output_folder=\"${DATA_FOLDER}/boundaries/output/2021/digital_boundary_files\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "a0372dac",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"files=('country_2021_digital'\n",
|
|
" 'grc_2021_digital'\n",
|
|
" 'pr_2021_digital'\n",
|
|
" 'er_2021_digital'\n",
|
|
" 'car_2021_digital'\n",
|
|
" 'cd_2021_digital'\n",
|
|
" 'ccs_2021_digital'\n",
|
|
" 'cma_2021_digital'\n",
|
|
" 'csd_2021_digital'\n",
|
|
" 'fed_2021_2013_digital'\n",
|
|
" 'dpl_2021_digital'\n",
|
|
" 'fsa_2021_digital'\n",
|
|
" 'pop_ctr_2021_digital'\n",
|
|
" 'ct_2021_digital'\n",
|
|
" 'da_2021_digital'\n",
|
|
" 'db_2021_digital'\n",
|
|
" 'ada_2021_digital'\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "04a2c69a-6204-4fa5-b6e2-f0375f51f425",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Export as Parquet"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "4c5bb532",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Exporting silver.country_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/country_2021.parquet\n",
|
|
"Exporting silver.grc_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/grc_2021.parquet\n",
|
|
"Exporting silver.pr_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/pr_2021.parquet\n",
|
|
"Exporting silver.er_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/er_2021.parquet\n",
|
|
"Exporting silver.car_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/car_2021.parquet\n",
|
|
"Exporting silver.cd_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/cd_2021.parquet\n",
|
|
"Exporting silver.ccs_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/ccs_2021.parquet\n",
|
|
"Exporting silver.cma_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/cma_2021.parquet\n",
|
|
"Exporting silver.csd_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/csd_2021.parquet\n",
|
|
"Exporting silver.fed_2021_2013_digital table to /data/boundaries/output/2021/digital_boundary_files/fed_2021_2013.parquet\n",
|
|
"Exporting silver.dpl_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/dpl_2021.parquet\n",
|
|
"Exporting silver.fsa_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/fsa_2021.parquet\n",
|
|
"Exporting silver.pop_ctr_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/pop_ctr_2021.parquet\n",
|
|
"Exporting silver.ct_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/ct_2021.parquet\n",
|
|
"Exporting silver.da_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/da_2021.parquet\n",
|
|
"Exporting silver.db_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/db_2021.parquet\n",
|
|
"Exporting silver.ada_2021_digital table to /data/boundaries/output/2021/digital_boundary_files/ada_2021.parquet\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for file in ${files[@]}\n",
|
|
"do\n",
|
|
" filename=\"${file/_digital/}.parquet\"\n",
|
|
" output_file=\"${output_folder}/${filename}\"\n",
|
|
" echo \"Exporting silver.${file} table to ${output_file}\" \n",
|
|
" ogr2ogr \\\n",
|
|
" -lco COMPRESSION=\"ZSTD\" \\\n",
|
|
" -lco CREATOR=\"www.dataforcanada.org\" \\\n",
|
|
" -lco WRITE_COVERING_BBOX=\"YES\" \\\n",
|
|
" -lco SORT_BY_BBOX=\"YES\" \\\n",
|
|
" -f Parquet \\\n",
|
|
" ${output_file} \\\n",
|
|
" \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n",
|
|
" \"silver.${file}\"\n",
|
|
"done"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7d62468a-f950-4e63-8bc5-4e3324f6ed65",
|
|
"metadata": {},
|
|
"source": [
|
|
"# 2. Export Cartographic Boundary Files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "3334fd6f-f63c-4ff6-b646-f6c11de75efa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"output_folder=\"${DATA_FOLDER}/boundaries/output/2021/cartographic_boundary_files\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "f2c21644-77e1-4ed6-856d-61912d10060c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"files=('country_2021_cartographic'\n",
|
|
" 'grc_2021_cartographic'\n",
|
|
" 'pr_2021_cartographic'\n",
|
|
" 'er_2021_cartographic'\n",
|
|
" 'car_2021_cartographic'\n",
|
|
" 'cd_2021_cartographic'\n",
|
|
" 'ccs_2021_cartographic'\n",
|
|
" 'cma_2021_cartographic'\n",
|
|
" 'csd_2021_cartographic'\n",
|
|
" 'fed_2021_2013_cartographic'\n",
|
|
" 'dpl_2021_cartographic'\n",
|
|
" 'fsa_2021_cartographic'\n",
|
|
" 'pop_ctr_2021_cartographic'\n",
|
|
" 'ct_2021_cartographic'\n",
|
|
" 'da_2021_cartographic'\n",
|
|
" 'db_2021_cartographic'\n",
|
|
" 'ada_2021_cartographic'\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "814e1471-7632-4db2-ae4d-6ac27564ada7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Exporting silver.country_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/country_2021.parquet\n",
|
|
"Exporting silver.grc_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/grc_2021.parquet\n",
|
|
"Exporting silver.pr_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/pr_2021.parquet\n",
|
|
"Exporting silver.er_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/er_2021.parquet\n",
|
|
"Exporting silver.car_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/car_2021.parquet\n",
|
|
"Exporting silver.cd_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/cd_2021.parquet\n",
|
|
"Exporting silver.ccs_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/ccs_2021.parquet\n",
|
|
"Exporting silver.cma_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/cma_2021.parquet\n",
|
|
"Exporting silver.csd_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/csd_2021.parquet\n",
|
|
"Exporting silver.fed_2021_2013_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/fed_2021_2013.parquet\n",
|
|
"Exporting silver.dpl_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/dpl_2021.parquet\n",
|
|
"Exporting silver.fsa_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/fsa_2021.parquet\n",
|
|
"Exporting silver.pop_ctr_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/pop_ctr_2021.parquet\n",
|
|
"Exporting silver.ct_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/ct_2021.parquet\n",
|
|
"Exporting silver.da_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/da_2021.parquet\n",
|
|
"Exporting silver.db_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/db_2021.parquet\n",
|
|
"Exporting silver.ada_2021_cartographic table to /data/boundaries/output/2021/cartographic_boundary_files/ada_2021.parquet\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for file in ${files[@]}\n",
|
|
"do\n",
|
|
" filename=\"${file/_cartographic/}.parquet\"\n",
|
|
" output_file=\"${output_folder}/${filename}\"\n",
|
|
" echo \"Exporting silver.${file} table to ${output_file}\" \n",
|
|
" ogr2ogr \\\n",
|
|
" -lco COMPRESSION=\"ZSTD\" \\\n",
|
|
" -lco CREATOR=\"www.dataforcanada.org\" \\\n",
|
|
" -lco WRITE_COVERING_BBOX=\"YES\" \\\n",
|
|
" -lco SORT_BY_BBOX=\"YES\" \\\n",
|
|
" -f Parquet \\\n",
|
|
" ${output_file} \\\n",
|
|
" \"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432\" \\\n",
|
|
" \"silver.${file}\"\n",
|
|
"done"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Bash",
|
|
"language": "bash",
|
|
"name": "bash"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": "shell",
|
|
"file_extension": ".sh",
|
|
"mimetype": "text/x-sh",
|
|
"name": "bash"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|