mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 14:10:55 +02:00
Initial commit
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
import os
|
||||
import sys
|
||||
|
||||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import text
|
||||
|
||||
placenames_2021_csv = sys.argv[1]
|
||||
|
||||
DATABASE = os.environ.get("POSTGRES_DB")
|
||||
USER = os.environ.get("POSTGRES_USER")
|
||||
PASSWORD = os.environ.get("POSTGRES_PASSWORD")
|
||||
|
||||
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@db:5432/{DATABASE}")
|
||||
|
||||
print(f"Reading {placenames_2021_csv}")
|
||||
placenames = pd.read_csv(filepath_or_buffer=placenames_2021_csv,
|
||||
encoding='ISO-8859-1',
|
||||
usecols=['PNdguid', 'PNname', 'PNsource', 'PNrplat', 'PNrplong'])
|
||||
|
||||
placenames.rename(columns={
|
||||
'PNdguid': 'pn_dguid',
|
||||
'PNname': 'pn_name',
|
||||
'PNsource': 'pn_source',
|
||||
'PNrplat': 'latitude',
|
||||
'PNrplong': 'longitude'
|
||||
}, inplace=True)
|
||||
|
||||
print("Creating geodataframe from placenames file")
|
||||
gdf = gpd.GeoDataFrame(
|
||||
placenames,
|
||||
geometry=gpd.points_from_xy(placenames.longitude,
|
||||
placenames.latitude),
|
||||
crs="EPSG:4326"
|
||||
)
|
||||
|
||||
print("Dropping 'latitude', 'longitude' from geodataframe")
|
||||
gdf.drop(columns=["latitude", "longitude"],
|
||||
inplace=True)
|
||||
|
||||
print(f"Loading geodataframe to PostgreSQL as bronze.pn_2021_tmp")
|
||||
gdf.to_postgis(name=f"pn_2021_tmp",
|
||||
con=engine,
|
||||
chunksize=150000,
|
||||
if_exists='replace',
|
||||
schema='bronze')
|
||||
|
||||
print("Creating silver.pn_2021")
|
||||
sql = """
|
||||
DROP TABLE IF EXISTS silver.pn_2021;
|
||||
|
||||
CREATE TABLE silver.pn_2021 AS
|
||||
SELECT
|
||||
db.country_dguid,
|
||||
db.country_en_name,
|
||||
db.country_fr_name,
|
||||
db.country_en_abbreviation,
|
||||
db.country_fr_abbreviation,
|
||||
db.grc_dguid,
|
||||
db.grc_en_name,
|
||||
db.grc_fr_name,
|
||||
db.pr_dguid,
|
||||
db.pr_en_name,
|
||||
db.pr_fr_name,
|
||||
db.pr_en_abbreviation,
|
||||
db.pr_fr_abbreviation,
|
||||
db.pr_iso_code,
|
||||
db.car_dguid,
|
||||
db.car_en_name,
|
||||
db.car_fr_name,
|
||||
db.er_dguid,
|
||||
db.er_name,
|
||||
db.cd_dguid,
|
||||
db.cd_name,
|
||||
db.cd_type,
|
||||
db.ccs_dguid,
|
||||
db.ccs_name,
|
||||
db.cma_dguid,
|
||||
db.cma_p_dguid,
|
||||
db.cma_name,
|
||||
db.cma_type,
|
||||
db.csd_dguid,
|
||||
db.csd_name,
|
||||
db.csd_type,
|
||||
db.sac_type,
|
||||
db.sac_code,
|
||||
db.fed_dguid,
|
||||
db.fed_name,
|
||||
db.fed_en_name,
|
||||
db.fed_fr_name,
|
||||
db.ct_dguid,
|
||||
db.ada_dguid,
|
||||
db.da_dguid,
|
||||
db.db_dguid,
|
||||
placenames.pn_dguid,
|
||||
placenames.pn_name,
|
||||
placenames.pn_source,
|
||||
placenames.geometry as geom
|
||||
FROM bronze.pn_2021_tmp as placenames,
|
||||
silver.db_2021 as db
|
||||
WHERE ST_Intersects(placenames.geometry, db.geom);
|
||||
|
||||
CREATE INDEX pn_2021_geom_idx ON
|
||||
silver.pn_2021
|
||||
USING GIST(geom) WITH (FILLFACTOR = 100);
|
||||
"""
|
||||
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text(sql))
|
||||
conn.commit()
|
||||
Reference in New Issue
Block a user