mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 14:10:55 +02:00
Found some issues with the output parquet files
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 12,
|
||||||
"id": "5e04e469-d3eb-42ca-b548-5e6f1fa6af9d",
|
"id": "5e04e469-d3eb-42ca-b548-5e6f1fa6af9d",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -11,15 +11,7 @@
|
|||||||
},
|
},
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Buckaroo has been enabled as the default DataFrame viewer. To return to default dataframe visualization use `from buckaroo import disable; disable()`\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"import buckaroo\n",
|
"import buckaroo\n",
|
||||||
"import duckdb\n",
|
"import duckdb\n",
|
||||||
@@ -28,7 +20,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 13,
|
||||||
"id": "97c8e92b-21e4-4cc5-8dbe-7b42361ce3f9",
|
"id": "97c8e92b-21e4-4cc5-8dbe-7b42361ce3f9",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -44,7 +36,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 14,
|
||||||
"id": "e02f2416-fd16-444b-8fd4-eec2cecee5a7",
|
"id": "e02f2416-fd16-444b-8fd4-eec2cecee5a7",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -57,10 +49,10 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<duckdb.duckdb.DuckDBPyConnection at 0x7fe04413c4b0>"
|
"<duckdb.duckdb.DuckDBPyConnection at 0x7f41d8274870>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 9,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -73,7 +65,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 15,
|
||||||
"id": "fafa7ce7-8619-4951-8c73-7bfbc66dc92f",
|
"id": "fafa7ce7-8619-4951-8c73-7bfbc66dc92f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -102,7 +94,7 @@
|
|||||||
" ('DECIMALS', 'TINYINT', 'YES', None, None, None)]"
|
" ('DECIMALS', 'TINYINT', 'YES', None, None, None)]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 10,
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -113,7 +105,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 16,
|
||||||
"id": "a4ed2881-91b7-4473-b246-a969ef59efba",
|
"id": "a4ed2881-91b7-4473-b246-a969ef59efba",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -123,7 +115,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 41,
|
"execution_count": 17,
|
||||||
"id": "f400feee-efb6-421a-b518-1f9c0fc21bcb",
|
"id": "f400feee-efb6-421a-b518-1f9c0fc21bcb",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -142,7 +134,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 42,
|
"execution_count": 18,
|
||||||
"id": "a187c850-981e-4348-a57d-2f25e57cf9db",
|
"id": "a187c850-981e-4348-a57d-2f25e57cf9db",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -155,7 +147,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "f5c04f09c7134ecfa633b956711be823",
|
"model_id": "ea3f2ee32a4d4ba1970f9f373b86fc02",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 1
|
"version_minor": 1
|
||||||
},
|
},
|
||||||
@@ -173,7 +165,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 45,
|
"execution_count": 19,
|
||||||
"id": "ff3eb520-c78b-4976-b734-840dc0fa53ab",
|
"id": "ff3eb520-c78b-4976-b734-840dc0fa53ab",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -184,14 +176,15 @@
|
|||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"ename": "IOException",
|
||||||
"text/plain": [
|
"evalue": "IO Error: No files found that match the pattern \"/data/tables/output/en/43100011.parquet\"",
|
||||||
"<duckdb.duckdb.DuckDBPyConnection at 0x7fe04413c4b0>"
|
"output_type": "error",
|
||||||
]
|
"traceback": [
|
||||||
},
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||||
"execution_count": 45,
|
"\u001b[31mIOException\u001b[39m Traceback (most recent call last)",
|
||||||
"metadata": {},
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[19]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mcon\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\"\"\u001b[39;49m\n\u001b[32m 2\u001b[39m \u001b[33;43mSELECT *\u001b[39;49m\n\u001b[32m 3\u001b[39m \u001b[33;43mFROM parquet_file_metadata(\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m/data/tables/output/en/43100011.parquet\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m);\u001b[39;49m\n\u001b[32m 4\u001b[39m \u001b[33;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||||
"output_type": "execute_result"
|
"\u001b[31mIOException\u001b[39m: IO Error: No files found that match the pattern \"/data/tables/output/en/43100011.parquet\""
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -203,7 +196,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 46,
|
"execution_count": 20,
|
||||||
"id": "30c8fc7e-cafd-43ec-a58f-aed4128df594",
|
"id": "30c8fc7e-cafd-43ec-a58f-aed4128df594",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -214,34 +207,15 @@
|
|||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"ename": "InvalidInputException",
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"evalue": "Invalid Input Error: No open result set",
|
||||||
"model_id": "7dc3ff8ea49441a5a548b223de7c823f",
|
"output_type": "error",
|
||||||
"version_major": 2,
|
"traceback": [
|
||||||
"version_minor": 0
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||||
},
|
"\u001b[31mInvalidInputException\u001b[39m Traceback (most recent call last)",
|
||||||
"text/plain": [
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[20]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mcon\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfetchall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
"\u001b[31mInvalidInputException\u001b[39m: Invalid Input Error: No open result set"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('/data/tables/output/en/43100011.parquet',\n",
|
|
||||||
" 'parquet-cpp-arrow version 20.0.0',\n",
|
|
||||||
" 172033460,\n",
|
|
||||||
" 165,\n",
|
|
||||||
" 2,\n",
|
|
||||||
" None,\n",
|
|
||||||
" None)]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 46,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -250,17 +224,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 21,
|
||||||
"id": "10f3238b-36a6-4033-a299-fc04190e9d63",
|
"id": "10f3238b-36a6-4033-a299-fc04190e9d63",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<duckdb.duckdb.DuckDBPyConnection at 0x7f427a6b6170>"
|
"<duckdb.duckdb.DuckDBPyConnection at 0x7f41d8274870>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 9,
|
"execution_count": 21,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -275,10 +249,25 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 22,
|
||||||
"id": "dcf81727-340c-4bcc-aec3-7133e0010eda",
|
"id": "dcf81727-340c-4bcc-aec3-7133e0010eda",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
|
"model_id": "1298d824f74c4fc59151d5b746d8e864",
|
||||||
|
"version_major": 2,
|
||||||
|
"version_minor": 0
|
||||||
|
},
|
||||||
|
"text/plain": [
|
||||||
|
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"distinct_dguid_geo = con.fetch_df()"
|
"distinct_dguid_geo = con.fetch_df()"
|
||||||
]
|
]
|
||||||
@@ -310,9 +299,256 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 30,
|
||||||
"id": "cfd1621f-9c4e-46ba-a64e-1c191ff27b35",
|
"id": "cfd1621f-9c4e-46ba-a64e-1c191ff27b35",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<duckdb.duckdb.DuckDBPyConnection at 0x7f41d8274870>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 30,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"con.execute(\"\"\"\n",
|
||||||
|
"SELECT DISTINCT REF_DATE, REF_START_DATE, REF_END_DATE\n",
|
||||||
|
"FROM read_parquet('/data/tables/output/en/*.parquet')\n",
|
||||||
|
"ORDER BY REF_START_DATE ASC\n",
|
||||||
|
"\"\"\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 31,
|
||||||
|
"id": "5b5fed59-b900-47a0-8ab5-15e557f02dbf",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"distinct_ref_date = con.fetch_df()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 32,
|
||||||
|
"id": "41fbfb30-a471-4304-a985-9367ab589107",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
|
"model_id": "0eaef40bea7f4a29810b29c1648f0ec1",
|
||||||
|
"version_major": 2,
|
||||||
|
"version_minor": 1
|
||||||
|
},
|
||||||
|
"text/plain": [
|
||||||
|
"BuckarooInfiniteWidget(buckaroo_options={'sampled': ['random'], 'auto_clean': ['aggressive', 'conservative'], …"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"distinct_ref_date"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 45,
|
||||||
|
"id": "51a6663f-55e8-45c0-ab0e-dc85a5f69278",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<duckdb.duckdb.DuckDBPyConnection at 0x7f41d8274870>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 45,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"con.execute(\"\"\"\n",
|
||||||
|
"SELECT * FROM parquet_file_metadata('/data/tables/output/en/*.parquet')\n",
|
||||||
|
"\"\"\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 46,
|
||||||
|
"id": "07fc9ad3-1512-48cc-975f-5a9e7f633d30",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
|
"model_id": "02c94fd7d5d245baa91972617f80026a",
|
||||||
|
"version_major": 2,
|
||||||
|
"version_minor": 1
|
||||||
|
},
|
||||||
|
"text/plain": [
|
||||||
|
"BuckarooInfiniteWidget(buckaroo_options={'sampled': ['random'], 'auto_clean': ['aggressive', 'conservative'], …"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"con.fetch_df()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 47,
|
||||||
|
"id": "2c01e753-5c54-403a-be2e-c65d3d4bdc89",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import glob"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 49,
|
||||||
|
"id": "fa16862e-60b6-4ae7-a8f2-55bdcc5edb68",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"output_parquet_files = glob.glob(\"/data/tables/output/en/*.parquet\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 63,
|
||||||
|
"id": "e5edf922-f945-413e-8aca-2929eb6b8d2f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/data/tables/output/en/10100164.parquet\n",
|
||||||
|
"/data/tables/output/en/23100049.parquet\n",
|
||||||
|
"/data/tables/output/en/23100050.parquet\n",
|
||||||
|
"/data/tables/output/en/36100658.parquet\n",
|
||||||
|
"/data/tables/output/en/36100374.parquet\n",
|
||||||
|
"/data/tables/output/en/36100396.parquet\n",
|
||||||
|
"/data/tables/output/en/36100397.parquet\n",
|
||||||
|
"/data/tables/output/en/38100104.parquet\n",
|
||||||
|
"/data/tables/output/en/38100105.parquet\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for parquet_file in output_parquet_files:\n",
|
||||||
|
" con.execute(f\"DESCRIBE '{parquet_file}'\")\n",
|
||||||
|
" value = con.fetch_df()\n",
|
||||||
|
" length_value_column = len(value[value['column_name'] == 'VALUE'][['column_name', 'column_type']])\n",
|
||||||
|
" if length_value_column == 0:\n",
|
||||||
|
" print(parquet_file) "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2f83e0e6-6126-44ca-974b-db92a97472cf",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Issue because there's a `Value` and `VALUE` columns in the CSV\n",
|
||||||
|
"`Value` and `VALUE` columns end up as `Value` and `VALUE_1` columns"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 65,
|
||||||
|
"id": "fc588dd7-a0d2-4b79-9c27-a6502ff99053",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[('REF_DATE', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('REF_START_DATE', 'TIMESTAMP_NS', 'YES', None, None, None),\n",
|
||||||
|
" ('REF_END_DATE', 'TIMESTAMP_NS', 'YES', None, None, None),\n",
|
||||||
|
" ('GEO', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('DGUID', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('Value', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('Type of cannabis', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('UOM', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('UOM_ID', 'SMALLINT', 'YES', None, None, None),\n",
|
||||||
|
" ('SCALAR_FACTOR', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('SCALAR_ID', 'TINYINT', 'YES', None, None, None),\n",
|
||||||
|
" ('VECTOR', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('COORDINATE', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('VALUE_1', 'BIGINT', 'YES', None, None, None),\n",
|
||||||
|
" ('STATUS', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('SYMBOL', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('TERMINATED', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('DECIMALS', 'TINYINT', 'YES', None, None, None)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 65,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"con.execute(\"DESCRIBE '/data/tables/output/en/10100164.parquet'\")\n",
|
||||||
|
"con.fetchall()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 66,
|
||||||
|
"id": "fef3d2e2-91c5-4fdf-bb00-3928c8f52d11",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[('REF_DATE', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('REF_START_DATE', 'TIMESTAMP_NS', 'YES', None, None, None),\n",
|
||||||
|
" ('REF_END_DATE', 'TIMESTAMP_NS', 'YES', None, None, None),\n",
|
||||||
|
" ('GEO', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('DGUID', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('Mainline companies', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('Property accounts', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('Value', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('UOM', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('UOM_ID', 'SMALLINT', 'YES', None, None, None),\n",
|
||||||
|
" ('SCALAR_FACTOR', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('SCALAR_ID', 'TINYINT', 'YES', None, None, None),\n",
|
||||||
|
" ('VECTOR', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('COORDINATE', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('VALUE_1', 'BIGINT', 'YES', None, None, None),\n",
|
||||||
|
" ('STATUS', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('SYMBOL', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('TERMINATED', 'VARCHAR', 'YES', None, None, None),\n",
|
||||||
|
" ('DECIMALS', 'TINYINT', 'YES', None, None, None)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 66,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"con.execute(\"DESCRIBE '/data/tables/output/en/23100049.parquet'\")\n",
|
||||||
|
"con.fetchall()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b472b408-4633-429d-824b-0d846db6e9db",
|
||||||
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user