mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-15 15:10:56 +02:00
Scraping the table names from from https://www150.statcan.gc.ca/n1/en/type/data
Will compare against the productIds available at https://www150.statcan.gc.ca/t1/wds/rest/getAllCubesListLite
This commit is contained in:
@@ -0,0 +1,93 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "561e3485-50c3-4177-861a-5989ae437b33",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"import aria2p\n",
|
||||
"\n",
|
||||
"# initialization, these are the default values\n",
|
||||
"aria2 = aria2p.API(\n",
|
||||
" aria2p.Client(\n",
|
||||
" host=\"http://localhost\",\n",
|
||||
" port=6800,\n",
|
||||
" secret=\"\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e30a5be2-a498-4f3c-9a94-a7a2659f258d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Helper function to wait for download to finish\n",
|
||||
"def wait_for_download(download):\n",
|
||||
" print(f\"Waiting for download: {download.name}\")\n",
|
||||
" while download.is_active:\n",
|
||||
" print(f\"Progress: {download.progress_string()} - {download.status}\")\n",
|
||||
" time.sleep(10)\n",
|
||||
" download = aria2.get_download(download.gid) # refresh state\n",
|
||||
" print(f\"Download completed: {download.name} - Status: {download.status}\")\n",
|
||||
" return download"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9fc190bd-3c68-48d7-902e-770bbd69fc38",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Waiting for download: test.parquet\n",
|
||||
"Progress: 0.00% - active\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"urls = ['https://data.dataforcanada.org/processed/statistics_canada/boundaries/2021/digital_boundary_files/db_2021.parquet']\n",
|
||||
"\n",
|
||||
"download = aria2.add_uris(urls, options={\"out\": \"test.parquet\"})\n",
|
||||
"download = wait_for_download(download)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8000ee01-d10c-44d9-a826-2d3c44dabd7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user