mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 14:10:55 +02:00
Add Census of Population 2021 example that describes the structure of the data
This commit is contained in:
@@ -0,0 +1,516 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "90f9ff98-9caa-49b5-acb1-c42755c681b5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import duckdb\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c3684ed1-7b99-4bdf-9bee-a3adea2d66a7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"con = duckdb.connect()\n",
|
||||
"con.install_extension(\"spatial\")\n",
|
||||
"con.load_extension(\"spatial\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "ae35cdbf-ec32-40f7-a2e6-20407f04d4cb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "caf012a82a014bbb8374d849ed9c2c9a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<duckdb.duckdb.DuckDBPyConnection at 0x7fd222a8fb70>"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"con.execute(\"\"\"\n",
|
||||
"DROP TABLE IF EXISTS geo_data;\n",
|
||||
"CREATE TABLE geo_data AS\n",
|
||||
"SELECT * FROM 'https://data-01.dataforcanada.org/processed/statistics_canada/census_of_population/2021/tabular/da_2021.parquet';\n",
|
||||
"\"\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "939f9204-ef9d-4eba-946c-a6b096d43fb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"geo_data = con.sql(\"SELECT * FROM geo_data\").to_df()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7467c036-a4c5-4c3a-9278-a64c384c9ab7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>da_dguid</th>\n",
|
||||
" <th>count_total_1</th>\n",
|
||||
" <th>count_total_4</th>\n",
|
||||
" <th>count_total_5</th>\n",
|
||||
" <th>count_total_6</th>\n",
|
||||
" <th>count_total_7</th>\n",
|
||||
" <th>count_total_8</th>\n",
|
||||
" <th>count_total_9</th>\n",
|
||||
" <th>count_total_10</th>\n",
|
||||
" <th>count_total_11</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>count_women_2622</th>\n",
|
||||
" <th>count_women_2623</th>\n",
|
||||
" <th>count_women_2624</th>\n",
|
||||
" <th>count_women_2625</th>\n",
|
||||
" <th>count_women_2626</th>\n",
|
||||
" <th>count_women_2627</th>\n",
|
||||
" <th>count_women_2628</th>\n",
|
||||
" <th>count_women_2629</th>\n",
|
||||
" <th>count_women_2630</th>\n",
|
||||
" <th>count_women_2631</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2021S051210010165</td>\n",
|
||||
" <td>500</td>\n",
|
||||
" <td>229</td>\n",
|
||||
" <td>217</td>\n",
|
||||
" <td>1309.9</td>\n",
|
||||
" <td>0.38</td>\n",
|
||||
" <td>500</td>\n",
|
||||
" <td>45</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>25</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>25</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2021S051210010166</td>\n",
|
||||
" <td>340</td>\n",
|
||||
" <td>165</td>\n",
|
||||
" <td>146</td>\n",
|
||||
" <td>3563.9</td>\n",
|
||||
" <td>0.10</td>\n",
|
||||
" <td>340</td>\n",
|
||||
" <td>50</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>25</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2021S051210010167</td>\n",
|
||||
" <td>407</td>\n",
|
||||
" <td>201</td>\n",
|
||||
" <td>184</td>\n",
|
||||
" <td>3217.4</td>\n",
|
||||
" <td>0.13</td>\n",
|
||||
" <td>405</td>\n",
|
||||
" <td>45</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>35</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>35</td>\n",
|
||||
" <td>25</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>25</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2021S051210010168</td>\n",
|
||||
" <td>523</td>\n",
|
||||
" <td>274</td>\n",
|
||||
" <td>248</td>\n",
|
||||
" <td>2736.8</td>\n",
|
||||
" <td>0.19</td>\n",
|
||||
" <td>525</td>\n",
|
||||
" <td>55</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>35</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>35</td>\n",
|
||||
" <td>25</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2021S051210010169</td>\n",
|
||||
" <td>285</td>\n",
|
||||
" <td>131</td>\n",
|
||||
" <td>123</td>\n",
|
||||
" <td>4305.1</td>\n",
|
||||
" <td>0.07</td>\n",
|
||||
" <td>285</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>57931</th>\n",
|
||||
" <td>2021S051262080024</td>\n",
|
||||
" <td>423</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>140</td>\n",
|
||||
" <td>1055.9</td>\n",
|
||||
" <td>0.40</td>\n",
|
||||
" <td>425</td>\n",
|
||||
" <td>90</td>\n",
|
||||
" <td>25</td>\n",
|
||||
" <td>35</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>55</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>50</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>57932</th>\n",
|
||||
" <td>2021S051262080025</td>\n",
|
||||
" <td>514</td>\n",
|
||||
" <td>162</td>\n",
|
||||
" <td>148</td>\n",
|
||||
" <td>1.8</td>\n",
|
||||
" <td>287.39</td>\n",
|
||||
" <td>515</td>\n",
|
||||
" <td>160</td>\n",
|
||||
" <td>50</td>\n",
|
||||
" <td>55</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>90</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>90</td>\n",
|
||||
" <td>65</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>60</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>57933</th>\n",
|
||||
" <td>2021S051262080026</td>\n",
|
||||
" <td>868</td>\n",
|
||||
" <td>276</td>\n",
|
||||
" <td>249</td>\n",
|
||||
" <td>3.4</td>\n",
|
||||
" <td>251.60</td>\n",
|
||||
" <td>865</td>\n",
|
||||
" <td>285</td>\n",
|
||||
" <td>100</td>\n",
|
||||
" <td>85</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>170</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>160</td>\n",
|
||||
" <td>120</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>110</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>57934</th>\n",
|
||||
" <td>2021S051262080027</td>\n",
|
||||
" <td>776</td>\n",
|
||||
" <td>196</td>\n",
|
||||
" <td>169</td>\n",
|
||||
" <td>31.5</td>\n",
|
||||
" <td>24.65</td>\n",
|
||||
" <td>775</td>\n",
|
||||
" <td>300</td>\n",
|
||||
" <td>115</td>\n",
|
||||
" <td>90</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>160</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>160</td>\n",
|
||||
" <td>100</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>100</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>57935</th>\n",
|
||||
" <td>2021S051262080028</td>\n",
|
||||
" <td>573</td>\n",
|
||||
" <td>143</td>\n",
|
||||
" <td>123</td>\n",
|
||||
" <td>146.9</td>\n",
|
||||
" <td>3.90</td>\n",
|
||||
" <td>575</td>\n",
|
||||
" <td>205</td>\n",
|
||||
" <td>75</td>\n",
|
||||
" <td>75</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>105</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>105</td>\n",
|
||||
" <td>70</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>65</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>57936 rows × 7432 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" da_dguid count_total_1 count_total_4 count_total_5 \\\n",
|
||||
"0 2021S051210010165 500 229 217 \n",
|
||||
"1 2021S051210010166 340 165 146 \n",
|
||||
"2 2021S051210010167 407 201 184 \n",
|
||||
"3 2021S051210010168 523 274 248 \n",
|
||||
"4 2021S051210010169 285 131 123 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"57931 2021S051262080024 423 166 140 \n",
|
||||
"57932 2021S051262080025 514 162 148 \n",
|
||||
"57933 2021S051262080026 868 276 249 \n",
|
||||
"57934 2021S051262080027 776 196 169 \n",
|
||||
"57935 2021S051262080028 573 143 123 \n",
|
||||
"\n",
|
||||
" count_total_6 count_total_7 count_total_8 count_total_9 \\\n",
|
||||
"0 1309.9 0.38 500 45 \n",
|
||||
"1 3563.9 0.10 340 50 \n",
|
||||
"2 3217.4 0.13 405 45 \n",
|
||||
"3 2736.8 0.19 525 55 \n",
|
||||
"4 4305.1 0.07 285 40 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"57931 1055.9 0.40 425 90 \n",
|
||||
"57932 1.8 287.39 515 160 \n",
|
||||
"57933 3.4 251.60 865 285 \n",
|
||||
"57934 31.5 24.65 775 300 \n",
|
||||
"57935 146.9 3.90 575 205 \n",
|
||||
"\n",
|
||||
" count_total_10 count_total_11 ... count_women_2622 \\\n",
|
||||
"0 10 20 ... 0 \n",
|
||||
"1 15 25 ... 15 \n",
|
||||
"2 15 10 ... 10 \n",
|
||||
"3 15 15 ... 15 \n",
|
||||
"4 10 15 ... 10 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"57931 25 35 ... 15 \n",
|
||||
"57932 50 55 ... 15 \n",
|
||||
"57933 100 85 ... 20 \n",
|
||||
"57934 115 90 ... 20 \n",
|
||||
"57935 75 75 ... 10 \n",
|
||||
"\n",
|
||||
" count_women_2623 count_women_2624 count_women_2625 count_women_2626 \\\n",
|
||||
"0 15 25 0 25 \n",
|
||||
"1 0 30 0 30 \n",
|
||||
"2 0 35 0 35 \n",
|
||||
"3 30 35 0 35 \n",
|
||||
"4 10 20 0 20 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"57931 10 55 0 50 \n",
|
||||
"57932 10 90 5 90 \n",
|
||||
"57933 15 170 10 160 \n",
|
||||
"57934 10 160 0 160 \n",
|
||||
"57935 15 105 5 105 \n",
|
||||
"\n",
|
||||
" count_women_2627 count_women_2628 count_women_2629 count_women_2630 \\\n",
|
||||
"0 20 0 0 0 \n",
|
||||
"1 20 0 0 0 \n",
|
||||
"2 25 0 0 0 \n",
|
||||
"3 25 0 0 0 \n",
|
||||
"4 15 0 0 0 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"57931 40 0 0 0 \n",
|
||||
"57932 65 0 0 0 \n",
|
||||
"57933 120 5 5 0 \n",
|
||||
"57934 100 0 0 0 \n",
|
||||
"57935 70 0 0 0 \n",
|
||||
"\n",
|
||||
" count_women_2631 \n",
|
||||
"0 20 \n",
|
||||
"1 20 \n",
|
||||
"2 25 \n",
|
||||
"3 20 \n",
|
||||
"4 15 \n",
|
||||
"... ... \n",
|
||||
"57931 40 \n",
|
||||
"57932 60 \n",
|
||||
"57933 110 \n",
|
||||
"57934 100 \n",
|
||||
"57935 65 \n",
|
||||
"\n",
|
||||
"[57936 rows x 7432 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"geo_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4af7039d-2454-403a-b7c4-ba0dc4ed6b03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user