mirror of
https://github.com/dataforcanada/d4c-service-main-site.git
synced 2026-06-13 14:00:51 +02:00
3.6 KiB
3.6 KiB
title
| title |
|---|
| DuckDB |
Here is a DuckDB example.
-- Load Extensions
INSTALL httpfs;
LOAD httpfs;
INSTALL spatial;
LOAD spatial;
-- Verbosity & Performance Monitoring
SET enable_progress_bar=true;
PRAGMA enable_profiling='query_tree';
SET explain_output='all';SET enable_progress_bar=true;
PRAGMA enable_profiling='json'; -- or 'query_tree' for a visual text tree
SET http_keep_alive=true;
-- Save Results to Local Database
ATTACH 'census_data.db' AS census_data_db;
/*
How to Determine the Field Names for Census of Population Characteristics
1. First find the characteristic ID by going to https://1drv.ms/x/c/d42308bcd3b7a4a1/ESlrAmKqXp9BnsMXqurcB0sB9qy1r2-ZV8tCL9nCns_Mpg?e=8qIroJ
2. Construct the field name
- If we wanted the total count for the "Population, 2021" characteristic, we would search for "count_total_1"
- If we wanted the male count for the "Population, 2021" characteristic, we would search for "count_male_total_1"
- If we wanted the female count for the "Population, 2021" characteristic, we would search for "count_female_total_1"
*/
/*
Get the following 2021 Census characteristics:
- count_total_1 - "Population, 2021".
- count_total_4 - "Total private dwellings".
- count_total_155 - "Total - Total income groups in 2020 for the population aged 15 years and over in private households - 100% data"
population, total private dwellings, and calculate percentage of population that
makes $100,000 or more, by Dissemination Area (2021 Census)".
Then we calculate the percentage of private households that earn $100,000 and over:
- percentage_over_100k - calculated from count_total_4 and count_total_5.
*/
-- Useful to see performance of queries
-- EXPLAIN ANALYZE SELECT
CREATE OR REPLACE TABLE census_data_db.da_income_stats_montreal AS
WITH calculated_data AS (
SELECT
geo.da_dguid,
geo.csd_dguid,
cop.count_total_1 AS population_2021,
cop.count_total_155 AS private_households_with_income,
cop.count_total_168 AS private_households_over_100k,
CAST((ST_Area_Spheroid(ST_FlipCoordinates(geo.geom)) / 1000000.0) AS DECIMAL(10, 4)) AS area_km2 -- Area in square kilometers
,geo.geom
FROM
'https://data-01.labs.dataforcanada.org/processed/ca_statcan_census_pop_dissemination_areas_tabular_2021.parquet' AS cop,
'https://data-01.labs.dataforcanada.org/processed/ca_statcan_dissemination_areas_digital_2021.parquet' AS geo
WHERE
cop.da_dguid = geo.da_dguid
AND geo.csd_dguid = '2021A00052466023' -- Montréal, QC
AND cop.count_total_1 > 0
)
SELECT
da_dguid,
csd_dguid,
population_2021,
private_households_with_income,
private_households_over_100k,
CAST(
CASE WHEN private_households_with_income > 0
THEN (CAST(private_households_over_100k AS BIGINT) * 100.00 / private_households_with_income)
ELSE 0 END AS DECIMAL(5, 2)
) AS private_households_percentage_over_100k,
CAST(
CASE WHEN area_km2 > 0
THEN (private_households_over_100k / area_km2)
ELSE 0 END AS INTEGER
) AS private_households_over_100k_per_km2
--
,geom
FROM
calculated_data
ORDER BY
private_households_over_100k_per_km2 DESC,
private_households_percentage_over_100k DESC
;
-- Output as GeoParquet
COPY geo_data TO 'ca_statcan_dissemination_areas_digital_2021_private_dwellings_questions.parquet' (FORMAT PARQUET);
-- Output as File Geodatabase
COPY geo_data TO 'ca_statcan_dissemination_areas_digital_2021_private_dwellings_question.gdb'
WITH (
FORMAT GDAL,
DRIVER 'OpenFileGDB',
GEOMETRY_TYPE 'POLYGON',
SRS 'EPSG:4326'
);