Initial commit

This commit is contained in:
Diego Ripley
2025-05-24 13:37:31 -04:00
commit f93e4d0cec
108 changed files with 11689 additions and 0 deletions
@@ -0,0 +1,10 @@
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_AB_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_BC_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_MB_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_NB_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_NT_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_NS_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_ON_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_PE_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_QC_v1.zip
https://www150.statcan.gc.ca/n1/en/pub/46-26-0001/2021001/ODA_SK_v1.zip
@@ -0,0 +1,8 @@
https://www150.statcan.gc.ca/n1/en/pub/34-26-0001/2018001/ODB_v2_Alberta.zip?st=0J_AsIyy
https://www150.statcan.gc.ca/n1/en/pub/34-26-0001/2018001/ODB_v2_BritishColumbia.zip
https://www150.statcan.gc.ca/n1/en/pub/34-26-0001/2018001/ODB_v2_NewBrunswick.zip?st=k35-Ygwr
https://www150.statcan.gc.ca/n1/en/pub/34-26-0001/2018001/ODB_v2_NorthwestTerritories.zip?st=SXozU436
https://www150.statcan.gc.ca/n1/en/pub/34-26-0001/2018001/ODB_v2_NovaScotia.zip
https://www150.statcan.gc.ca/n1/en/pub/34-26-0001/2018001/ODB_v2_Ontario.zip
https://www150.statcan.gc.ca/n1/en/pub/34-26-0001/2018001/ODB_v2_Quebec.zip
https://www150.statcan.gc.ca/n1/en/pub/34-26-0001/2018001/ODB_v2_Saskatchewan.zip
@@ -0,0 +1 @@
https://www150.statcan.gc.ca/n1/en/pub/21-26-0001/2020001/ODCAF_V1.0.zip
@@ -0,0 +1 @@
https://www150.statcan.gc.ca/n1/en/pub/37-26-0001/2022001/ODEF_v2.1.zip
@@ -0,0 +1 @@
https://www150.statcan.gc.ca/n1/en/pub/32-26-0005/2023001/ODG_V1.zip
@@ -0,0 +1 @@
https://www150.statcan.gc.ca/n1/en/pub/13-26-0001/2020001/ODHF_v1.1.zip
@@ -0,0 +1 @@
https://www150.statcan.gc.ca/n1/en/pub/21-26-0002/2021001/ODRSF_v1.0.zip
+96
View File
@@ -0,0 +1,96 @@
#!/bin/bash
DATA_FOLDER=/home/ripledi/Documents/projects/process-statcan-spatial-data/data
source credentials.sh
export_postgis_single() {
local filepath=$1
local table_name=$2
local extra_parameters=${@:3}
# Virtual file system
if [[ ${filepath: -4} = '.zip' ]]; then
local filepath="/vsizip/${filepath}"
fi
echo "Importing ${filepath}"
ogr2ogr \
--config PG_USE_COPY YES \
-overwrite \
-f "PostgreSQL" \
"PG:host=db dbname=${POSTGRES_DB} user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} port=5432" \
-progress \
-gt 500000 \
-t_srs EPSG:4326 \
-nln ${table_name} \
${extra_parameters} \
${filepath}
}
export_open_database_of_greenhouses() {
export PGCLIENTENCODING=UTF-8;
export_postgis_single ${DATA_FOLDER}/ODG_V1/odg_v1.shp statcan_odg_tmp
}
export_open_database_of_buildings() {
# Open Database of Buildings
export PGCLIENTENCODING=UTF-8;
export_postgis_single ${DATA_FOLDER}/ODB_Alberta/odb_alberta.shp statcan_odb_tmp "-nlt PROMOTE_TO_MULTI"
export_postgis_single ${DATA_FOLDER}/ODB_BritishColumbia/odb_britishcolumbia.shp statcan_odb_tmp "-append -nlt PROMOTE_TO_MULTI"
export_postgis_single ${DATA_FOLDER}/ODB_NewBrunswick/odb_newbrunswick.shp statcan_odb_tmp "-append -nlt PROMOTE_TO_MULTI"
export_postgis_single ${DATA_FOLDER}/ODB_NorthwestTerritories/odb_northwestterritories.shp statcan_odb_tmp "-append -nlt PROMOTE_TO_MULTI"
export_postgis_single ${DATA_FOLDER}/ODB_NovaScotia/odb_novascotia.shp statcan_odb_tmp "-append -nlt PROMOTE_TO_MULTI"
export_postgis_single ${DATA_FOLDER}/ODB_Ontario/odb_ontario.shp statcan_odb_tmp "-append -nlt PROMOTE_TO_MULTI"
export_postgis_single ${DATA_FOLDER}/ODB_Quebec/odb_quebec.shp statcan_odb_tmp "-append -nlt PROMOTE_TO_MULTI"
export_postgis_single ${DATA_FOLDER}/ODB_Saskatchewan/odb_saskatchewan.shp statcan_odb_tmp "-append -nlt PROMOTE_TO_MULTI"
}
export_open_database_of_educational_facilities() {
export PGCLIENTENCODING=LATIN-1;
export_postgis_single ${DATA_FOLDER}/ODEF_v2.1_EN/ODEF_v2_1.csv statcan_odef_tmp "-oo X_POSSIBLE_NAMES=Longitude, -oo Y_POSSIBLE_NAMES=Latitude -s_srs EPSG:4326"
}
export_open_database_of_healthcare_facilities() {
export PGCLIENTENCODING=LATIN-1;
# TODO: process further
# There are issues with the characters in this file, example <97>
export_postgis_single ${DATA_FOLDER}/ODHF_v1.1/odhf_v1.1.csv statcan_odhf_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326"
}
export_open_database_of_cultural_and_art_facilities() {
export PGCLIENTENCODING=LATIN-1;
# TODO: process further
export_postgis_single ${DATA_FOLDER}/ODCAF_V1.0/ODCAF_v1.0.csv statcan_odcaf_tmp "-oo X_POSSIBLE_NAMES=Longitude, -oo Y_POSSIBLE_NAMES=Latitude -s_srs EPSG:4326"
}
export_open_database_of_addresses() {
# PGCLIENTENCODING=UTF-8 seems to have fixed all of the issues
export PGCLIENTENCODING=UTF-8;
export_postgis_single ${DATA_FOLDER}/ODA_AB_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326"
export_postgis_single ${DATA_FOLDER}/ODA_BC_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
export_postgis_single ${DATA_FOLDER}/ODA_MB_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
export_postgis_single ${DATA_FOLDER}/ODA_NB_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
export_postgis_single ${DATA_FOLDER}/ODA_NS_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
export_postgis_single ${DATA_FOLDER}/ODA_NT_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
export_postgis_single ${DATA_FOLDER}/ODA_ON_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
export_postgis_single ${DATA_FOLDER}/ODA_PE_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
export_postgis_single ${DATA_FOLDER}/ODA_QC_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
export_postgis_single ${DATA_FOLDER}/ODA_SK_v1.csv statcan_oda_tmp "-oo X_POSSIBLE_NAMES=longitude, -oo Y_POSSIBLE_NAMES=latitude -s_srs EPSG:4326 -append"
}
export_open_database_of_recreational_and_sport_facilities() {
export PGCLIENTENCODING=LATIN-1;
# TODO: process further
export_postgis_single ${DATA_FOLDER}/ODRSF_V1.0/ODRSF_v1.0.csv statcan_odrsf_tmp "-oo X_POSSIBLE_NAMES=Longitude, -oo Y_POSSIBLE_NAMES=Latitude -s_srs EPSG:4326"
}
#export_open_database_of_greenhouses
#export_open_database_of_buildings
#export_open_database_of_educational_facilities
#export_open_database_of_healthcare_facilities
#export_open_database_of_cultural_and_art_facilities
#export_open_database_of_addresses
export_open_database_of_recreational_and_sport_facilities
+112
View File
@@ -0,0 +1,112 @@
/* Open Databases */
/* Open Database of Greenhouses */
drop table if exists statcan_odg_2023;
create table statcan_odg_2023 as
select b.dguid as prdguid, b.prename as provincenameenglish, a.imagedate, a.datasource as provider, wkb_geometry as geom
from statcan_odg_tmp as a,
statcan_pr_2021 as b
where st_intersects(a.wkb_geometry, b.geom);
create index statcan_odg_2023_geom_idx on statcan_odg_2023 using GIST(geom) with (FILLFACTOR=100);
drop table if exists statcan_odg_tmp;
/* Open Database of Buildings */
create table statcan_odb_2019 as
select b.dguid as csddguid, b.csdname, a.data_prov as data_provider, a.build_id, a.wkb_geometry as geom
from statcan_odb_tmp as a,
statcan_csd_2021 as b
where st_intersects(a.wkb_geometry, b.geom);
create index statcan_odb_2019_geom_idx on statcan_odb_2019 using GIST(geom) with (FILLFACTOR=100);
drop table if exists statcan_odb_tmp;
/* Open Database of Educational Facilities */
drop table if exists statcan_odef_2022;
create table statcan_odef_2022 as
select index, source_id, facility_name, facility_type, authority_name, isced010, isced020, isced1, isced2, isced3, isced4plus, olms_status, unit, street_no, street_name, city, prov_terr,
postal_code, a.pruid, csdname, csduid, geo_source, provider, cmaname, cmauid, wkb_geometry as geom
from statcan_odef_tmp as a,
statcan_pr_2021 as b
where st_intersects(a.wkb_geometry, b.geom);
create index statcan_odef_2022_geom_idx on statcan_odef_2022 using GIST(geom) with (FILLFACTOR=100);
drop table if exists statcan_odef_tmp;
/* Open Database of Healthcare Facilities */
drop table if exists statcan_odhf_2020;
create table statcan_odhf_2020 as
select index, facility_name, source_facility_type, odhf_facility_type, provider, unit, street_no, street_name, postal_code,
b.dguid as csddguid, b.csdname, c.dguid as prdguid, c.prename, a.wkb_geometry as geom
from statcan_odhf_tmp as a,
statcan_csd_2021 as b,
statcan_pr_2021 as c
where a.wkb_geometry is not null
and st_intersects(a.wkb_geometry, b.geom)
and st_intersects(a.wkb_geometry, c.geom);
create index statcan_odhf_2020_geom_idx on statcan_odhf_2020 using GIST(geom) with (FILLFACTOR=100);
drop table if exists statcan_odhf_tmp;
/* Open Database of Cultural and Art Facilities */
drop table if exists statcan_odcaf_2020;
create table statcan_odcaf_2020 as
select index, facility_name, source_facility_type, odcaf_facility_type, provider, unit, street_no,
street_name, postal_code, city, prov_terr, csd_name, csduid, pruid, wkb_geometry as geom
from statcan_odcaf_tmp;
create index statcan_odcaf_2020_geom_idx on statcan_odcaf_2020 using GIST(geom) with (FILLFACTOR=100);
update statcan_odcaf_2020
set facility_name = ''
where facility_name = '..';
update statcan_odcaf_2020
set source_facility_type = ''
where source_facility_type = '..';
update statcan_odcaf_2020
set unit = ''
where unit = '..';
update statcan_odcaf_2020
set street_no = ''
where street_no = '..';
update statcan_odcaf_2020
set street_name = ''
where street_name = '..';
update statcan_odcaf_2020
set postal_code = ''
where postal_code = '..';
update statcan_odcaf_2020
set city = ''
where city = '..';
update statcan_odcaf_2020
set city = ''
where city = '..';
update statcan_odcaf_2020
set prov_terr = ''
where prov_terr = '..';
update statcan_odcaf_2020
set csd_name = ''
where csd_name = '..';
update statcan_odcaf_2020
set csduid = ''
where csduid = '..';
update statcan_odcaf_2020
set pruid = ''
where pruid = '..';
drop table if exists statcan_odcaf_tmp;
/* Open Database of Addresses */
create table statcan_oda_2021 as
select a.id, a.street_no, a.street, a.unit, a.postal_code, b.dguid as csddguid, b.csdname, c.dguid as prdguid, a.provider, wkb_geometry as geom
from statcan_oda_tmp as a,
statcan_csd_2021 as b,
statcan_pr_2021 as c
where st_intersects(a.wkb_geometry, b.geom)
and b.pruid = c.pruid;
create index statcan_oda_2021_geom_idx on statcan_oda_2021 using GIST(geom) with (FILLFACTOR=100);
drop table if exists statcan_oda_tmp;
/* Open Database of Recreational and Sport Facilities */
-- TODO;