mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 14:10:55 +02:00
113 lines
2.5 KiB
Python
113 lines
2.5 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
import os
|
|
import sys
|
|
|
|
import geopandas as gpd
|
|
import pandas as pd
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy import text
|
|
|
|
placenames_2021_csv = sys.argv[1]
|
|
|
|
DATABASE = os.environ.get("POSTGRES_DB")
|
|
USER = os.environ.get("POSTGRES_USER")
|
|
PASSWORD = os.environ.get("POSTGRES_PASSWORD")
|
|
|
|
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@db:5432/{DATABASE}")
|
|
|
|
print(f"Reading {placenames_2021_csv}")
|
|
placenames = pd.read_csv(filepath_or_buffer=placenames_2021_csv,
|
|
encoding='ISO-8859-1',
|
|
usecols=['PNdguid', 'PNname', 'PNsource', 'PNrplat', 'PNrplong'])
|
|
|
|
placenames.rename(columns={
|
|
'PNdguid': 'pn_dguid',
|
|
'PNname': 'pn_name',
|
|
'PNsource': 'pn_source',
|
|
'PNrplat': 'latitude',
|
|
'PNrplong': 'longitude'
|
|
}, inplace=True)
|
|
|
|
print("Creating geodataframe from placenames file")
|
|
gdf = gpd.GeoDataFrame(
|
|
placenames,
|
|
geometry=gpd.points_from_xy(placenames.longitude,
|
|
placenames.latitude),
|
|
crs="EPSG:4326"
|
|
)
|
|
|
|
print("Dropping 'latitude', 'longitude' from geodataframe")
|
|
gdf.drop(columns=["latitude", "longitude"],
|
|
inplace=True)
|
|
|
|
print(f"Loading geodataframe to PostgreSQL as bronze.pn_2021_tmp")
|
|
gdf.to_postgis(name=f"pn_2021_tmp",
|
|
con=engine,
|
|
chunksize=150000,
|
|
if_exists='replace',
|
|
schema='bronze')
|
|
|
|
print("Creating silver.pn_2021")
|
|
sql = """
|
|
DROP TABLE IF EXISTS silver.pn_2021;
|
|
|
|
CREATE TABLE silver.pn_2021 AS
|
|
SELECT
|
|
db.country_dguid,
|
|
db.country_en_name,
|
|
db.country_fr_name,
|
|
db.country_en_abbreviation,
|
|
db.country_fr_abbreviation,
|
|
db.grc_dguid,
|
|
db.grc_en_name,
|
|
db.grc_fr_name,
|
|
db.pr_dguid,
|
|
db.pr_en_name,
|
|
db.pr_fr_name,
|
|
db.pr_en_abbreviation,
|
|
db.pr_fr_abbreviation,
|
|
db.pr_iso_code,
|
|
db.car_dguid,
|
|
db.car_en_name,
|
|
db.car_fr_name,
|
|
db.er_dguid,
|
|
db.er_name,
|
|
db.cd_dguid,
|
|
db.cd_name,
|
|
db.cd_type,
|
|
db.ccs_dguid,
|
|
db.ccs_name,
|
|
db.cma_dguid,
|
|
db.cma_p_dguid,
|
|
db.cma_name,
|
|
db.cma_type,
|
|
db.csd_dguid,
|
|
db.csd_name,
|
|
db.csd_type,
|
|
db.sac_type,
|
|
db.sac_code,
|
|
db.fed_dguid,
|
|
db.fed_name,
|
|
db.fed_en_name,
|
|
db.fed_fr_name,
|
|
db.ct_dguid,
|
|
db.ada_dguid,
|
|
db.da_dguid,
|
|
db.db_dguid,
|
|
placenames.pn_dguid,
|
|
placenames.pn_name,
|
|
placenames.pn_source,
|
|
placenames.geometry as geom
|
|
FROM bronze.pn_2021_tmp as placenames,
|
|
silver.db_2021 as db
|
|
WHERE ST_Intersects(placenames.geometry, db.geom);
|
|
|
|
CREATE INDEX pn_2021_geom_idx ON
|
|
silver.pn_2021
|
|
USING GIST(geom) WITH (FILLFACTOR = 100);
|
|
"""
|
|
|
|
with engine.connect() as conn:
|
|
conn.execute(text(sql))
|
|
conn.commit() |