mirror of
https://github.com/dataforcanada/d4c-datapkg-statistical.git
synced 2026-06-13 14:10:55 +02:00
Made changes to processing of data tables
This commit is contained in:
@@ -178,7 +178,7 @@ def convert_to_lowest_type(df):
|
|||||||
for row in dtypes.itertuples():
|
for row in dtypes.itertuples():
|
||||||
column = row[0]
|
column = row[0]
|
||||||
the_type = str(row[1])
|
the_type = str(row[1])
|
||||||
if the_type == 'int64':
|
if the_type == 'Int64':
|
||||||
df[column] = pd.to_numeric(df[column], downcast='integer')
|
df[column] = pd.to_numeric(df[column], downcast='integer')
|
||||||
|
|
||||||
return df
|
return df
|
||||||
@@ -229,6 +229,8 @@ def process_cube(product_id, language="en"):
|
|||||||
- productId 43100011 has all with DECIMAL = 1 (float64)
|
- productId 43100011 has all with DECIMAL = 1 (float64)
|
||||||
- productId 17100009 has DECIMAL = 0 (int64)
|
- productId 17100009 has DECIMAL = 0 (int64)
|
||||||
- productId 35100076 has multiple DECIMAL precisions [0, 1, 2] (int64, float64, float64)
|
- productId 35100076 has multiple DECIMAL precisions [0, 1, 2] (int64, float64, float64)
|
||||||
|
- productId 10100164 has two columns named the same "Value" and "VALUE". It is processed fine with the read_csv, and when it is exported as parquet.
|
||||||
|
DuckDB has an issue with it, but Pandas and Polars are able to handle "Value" and "VALUE"
|
||||||
"""
|
"""
|
||||||
cur.execute("SELECT product_id FROM downloaded WHERE product_id = ?", (product_id,))
|
cur.execute("SELECT product_id FROM downloaded WHERE product_id = ?", (product_id,))
|
||||||
result = cur.fetchone()
|
result = cur.fetchone()
|
||||||
@@ -268,6 +270,7 @@ def process_cube(product_id, language="en"):
|
|||||||
if column in columns:
|
if column in columns:
|
||||||
parameters["dtype"][column] = 'int16'
|
parameters["dtype"][column] = 'int16'
|
||||||
|
|
||||||
|
# The remaining columns should be string, with the exception of VALUE
|
||||||
for column in columns:
|
for column in columns:
|
||||||
if column not in columns_always_int_8 and column not in columns_always_int_16 and column != "VALUE":
|
if column not in columns_always_int_8 and column not in columns_always_int_16 and column != "VALUE":
|
||||||
parameters["dtype"][column] = 'string'
|
parameters["dtype"][column] = 'string'
|
||||||
|
|||||||
@@ -25,7 +25,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": 1,
|
||||||
"id": "98859cd6-6fa4-4aef-a113-455699524fae",
|
"id": "98859cd6-6fa4-4aef-a113-455699524fae",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -77,7 +77,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 21,
|
"execution_count": 2,
|
||||||
"id": "28ac4c01-c1c5-427f-bb2c-0da99a4c5591",
|
"id": "28ac4c01-c1c5-427f-bb2c-0da99a4c5591",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -99,7 +99,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": 3,
|
||||||
"id": "9daa94f4-16c9-4d8b-951e-3d5d38eb618f",
|
"id": "9daa94f4-16c9-4d8b-951e-3d5d38eb618f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -109,7 +109,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 4,
|
||||||
"id": "0af9a4b3-7b59-460b-b933-504919d4bd2a",
|
"id": "0af9a4b3-7b59-460b-b933-504919d4bd2a",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -150,7 +150,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 24,
|
"execution_count": 5,
|
||||||
"id": "4b7996d2-75ab-4173-a17a-64fb7ab63740",
|
"id": "4b7996d2-75ab-4173-a17a-64fb7ab63740",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -163,7 +163,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 6,
|
||||||
"id": "23cbabc3-0d4b-4e28-a4df-b2c5e8c7ea8b",
|
"id": "23cbabc3-0d4b-4e28-a4df-b2c5e8c7ea8b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -212,7 +212,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": 7,
|
||||||
"id": "dc5573ef-734b-44d8-a4c4-0df19d655975",
|
"id": "dc5573ef-734b-44d8-a4c4-0df19d655975",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -222,7 +222,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 8,
|
||||||
"id": "eddf6501-8428-44cc-8d2d-e245803a3943",
|
"id": "eddf6501-8428-44cc-8d2d-e245803a3943",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -296,7 +296,7 @@
|
|||||||
" for row in dtypes.itertuples():\n",
|
" for row in dtypes.itertuples():\n",
|
||||||
" column = row[0]\n",
|
" column = row[0]\n",
|
||||||
" the_type = str(row[1])\n",
|
" the_type = str(row[1])\n",
|
||||||
" if the_type == 'int64':\n",
|
" if the_type == 'Int64':\n",
|
||||||
" df[column] = pd.to_numeric(df[column], downcast='integer')\n",
|
" df[column] = pd.to_numeric(df[column], downcast='integer')\n",
|
||||||
"\n",
|
"\n",
|
||||||
" return df"
|
" return df"
|
||||||
@@ -304,7 +304,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 10,
|
||||||
"id": "144e3716-b0e7-4a39-9a25-ededea506f4f",
|
"id": "144e3716-b0e7-4a39-9a25-ededea506f4f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -352,7 +352,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 78,
|
"execution_count": 40,
|
||||||
"id": "858e405e-7c02-4193-8abe-f23951761b09",
|
"id": "858e405e-7c02-4193-8abe-f23951761b09",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -366,15 +366,13 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Reading /data/tables/scratch/13100102.csv\n",
|
"Already processed 10100164\n",
|
||||||
"Index(['REF_DATE', 'GEO', 'DGUID',\n",
|
"Extracting /data/tables/input/en/10100164.zip to /data/tables/scratch\n",
|
||||||
" 'North American Industry Classification System (NAICS)',\n",
|
"Reading /data/tables/scratch/10100164.csv\n",
|
||||||
" 'Summary statistics', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID',\n",
|
"Index(['REF_DATE', 'GEO', 'DGUID', 'Value', 'Type of cannabis', 'UOM',\n",
|
||||||
" 'VECTOR', 'COORDINATE', 'VALUE', 'STATUS', 'SYMBOL', 'TERMINATED',\n",
|
" 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'VALUE',\n",
|
||||||
" 'DECIMALS'],\n",
|
" 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS'],\n",
|
||||||
" dtype='object')\n",
|
" dtype='object')\n"
|
||||||
"{'engine': 'c', 'low_memory': True, 'nrows': 1000000, 'dtype': {'DECIMALS': 'int8', 'SCALAR_ID': 'int8', 'UOM_ID': 'int16', 'REF_DATE': 'string', 'GEO': 'string', 'DGUID': 'string', 'North American Industry Classification System (NAICS)': 'string', 'Summary statistics': 'string', 'UOM': 'string', 'SCALAR_FACTOR': 'string', 'VECTOR': 'string', 'COORDINATE': 'string', 'STATUS': 'string', 'SYMBOL': 'string', 'TERMINATED': 'string'}}\n",
|
|
||||||
"Reading /data/tables/scratch/13100102.csv as a Pandas dataframe\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -384,9 +382,11 @@
|
|||||||
"- productId 43100011 has all with DECIMAL = 1 (float64)\n",
|
"- productId 43100011 has all with DECIMAL = 1 (float64)\n",
|
||||||
"- productId 17100009 has DECIMAL = 0 (int64)\n",
|
"- productId 17100009 has DECIMAL = 0 (int64)\n",
|
||||||
"- productId 35100076 has multiple DECIMAL precisions [0, 1, 2] (int64, float64, float64)\n",
|
"- productId 35100076 has multiple DECIMAL precisions [0, 1, 2] (int64, float64, float64)\n",
|
||||||
|
"- productId 10100164 has two columns named the same \"Value\" and \"VALUE\". It is processed fine with the read_csv, and when it is exported as parquet.\n",
|
||||||
|
"DuckDB has an issue with it, but Pandas and Polars are able to handle \"Value\" and \"VALUE\"\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"product_id = \"13100102\"\n",
|
"product_id = \"10100164\"\n",
|
||||||
"#def process_cube(product_id, language=\"en\"):\n",
|
"#def process_cube(product_id, language=\"en\"):\n",
|
||||||
"language = \"en\"\n",
|
"language = \"en\"\n",
|
||||||
"cur.execute(\"SELECT product_id FROM downloaded WHERE product_id = ?\", (product_id,))\n",
|
"cur.execute(\"SELECT product_id FROM downloaded WHERE product_id = ?\", (product_id,))\n",
|
||||||
@@ -394,7 +394,7 @@
|
|||||||
"if result:\n",
|
"if result:\n",
|
||||||
" print(f\"Already processed {product_id}\")\n",
|
" print(f\"Already processed {product_id}\")\n",
|
||||||
" #return\n",
|
" #return\n",
|
||||||
"#extract_zipfile(product_id, language)\n",
|
"extract_zipfile(product_id, language)\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"The pandas column reader is better than the Polars one\n",
|
"The pandas column reader is better than the Polars one\n",
|
||||||
"Here is an example where polars was not reading it right:\n",
|
"Here is an example where polars was not reading it right:\n",
|
||||||
@@ -417,8 +417,25 @@
|
|||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"columns = pd.read_csv(product_csv, nrows=0).columns\n",
|
"columns = pd.read_csv(product_csv, nrows=0).columns\n",
|
||||||
"print(columns)\n",
|
"print(columns)"
|
||||||
"\n",
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 30,
|
||||||
|
"id": "c1f89175-78e5-4e95-8a3e-3b65f0cb4b2d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"{'engine': 'c', 'low_memory': True, 'nrows': 1000000, 'dtype': {'DECIMALS': 'int8', 'SCALAR_ID': 'int8', 'UOM_ID': 'int16', 'REF_DATE': 'string', 'GEO': 'string', 'DGUID': 'string', 'Value': 'string', 'Type of cannabis': 'string', 'UOM': 'string', 'SCALAR_FACTOR': 'string', 'VECTOR': 'string', 'COORDINATE': 'string', 'STATUS': 'string', 'SYMBOL': 'string', 'TERMINATED': 'string'}}\n",
|
||||||
|
"Reading /data/tables/scratch/10100164.csv as a Pandas dataframe\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
"columns_always_int_8 = [\"DECIMALS\", \"SCALAR_ID\"]\n",
|
"columns_always_int_8 = [\"DECIMALS\", \"SCALAR_ID\"]\n",
|
||||||
"for column in columns_always_int_8:\n",
|
"for column in columns_always_int_8:\n",
|
||||||
" if column in columns:\n",
|
" if column in columns:\n",
|
||||||
@@ -429,6 +446,7 @@
|
|||||||
" if column in columns:\n",
|
" if column in columns:\n",
|
||||||
" parameters[\"dtype\"][column] = 'int16'\n",
|
" parameters[\"dtype\"][column] = 'int16'\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# The remaining columns should be string, with the exception of VALUE\n",
|
||||||
"for column in columns:\n",
|
"for column in columns:\n",
|
||||||
" if column not in columns_always_int_8 and column not in columns_always_int_16 and column != \"VALUE\":\n",
|
" if column not in columns_always_int_8 and column not in columns_always_int_16 and column != \"VALUE\":\n",
|
||||||
" parameters[\"dtype\"][column] = 'string'\n",
|
" parameters[\"dtype\"][column] = 'string'\n",
|
||||||
@@ -443,7 +461,221 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 79,
|
"execution_count": 31,
|
||||||
|
"id": "87ff5f69-ca1f-40e0-ac73-c73dd8c1bd4d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>REF_DATE</th>\n",
|
||||||
|
" <th>GEO</th>\n",
|
||||||
|
" <th>DGUID</th>\n",
|
||||||
|
" <th>Value</th>\n",
|
||||||
|
" <th>Type of cannabis</th>\n",
|
||||||
|
" <th>UOM</th>\n",
|
||||||
|
" <th>UOM_ID</th>\n",
|
||||||
|
" <th>SCALAR_FACTOR</th>\n",
|
||||||
|
" <th>SCALAR_ID</th>\n",
|
||||||
|
" <th>VECTOR</th>\n",
|
||||||
|
" <th>COORDINATE</th>\n",
|
||||||
|
" <th>VALUE</th>\n",
|
||||||
|
" <th>STATUS</th>\n",
|
||||||
|
" <th>SYMBOL</th>\n",
|
||||||
|
" <th>TERMINATED</th>\n",
|
||||||
|
" <th>DECIMALS</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>2021/2022</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>2021A000011124</td>\n",
|
||||||
|
" <td>Value of sales</td>\n",
|
||||||
|
" <td>Total cannabis products</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>thousands</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>v1490436660</td>\n",
|
||||||
|
" <td>1.1.1</td>\n",
|
||||||
|
" <td>4027928.0</td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>2021/2022</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>2021A000011124</td>\n",
|
||||||
|
" <td>Value of sales</td>\n",
|
||||||
|
" <td>Dried cannabis</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>thousands</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>v1490436647</td>\n",
|
||||||
|
" <td>1.1.2</td>\n",
|
||||||
|
" <td>2861838.0</td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>2021/2022</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>2021A000011124</td>\n",
|
||||||
|
" <td>Value of sales</td>\n",
|
||||||
|
" <td>Inhaled cannabis extracts</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>thousands</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>v1490436648</td>\n",
|
||||||
|
" <td>1.1.3</td>\n",
|
||||||
|
" <td>729178.0</td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>2021/2022</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>2021A000011124</td>\n",
|
||||||
|
" <td>Value of sales</td>\n",
|
||||||
|
" <td>Ingested cannabis extracts</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>thousands</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>v1490436649</td>\n",
|
||||||
|
" <td>1.1.4</td>\n",
|
||||||
|
" <td>158283.0</td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>2021/2022</td>\n",
|
||||||
|
" <td>Canada</td>\n",
|
||||||
|
" <td>2021A000011124</td>\n",
|
||||||
|
" <td>Value of sales</td>\n",
|
||||||
|
" <td>Solid cannabis edibles</td>\n",
|
||||||
|
" <td>Dollars</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>thousands</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>v1490436650</td>\n",
|
||||||
|
" <td>1.1.5</td>\n",
|
||||||
|
" <td>166336.0</td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td><NA></td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" REF_DATE GEO DGUID Value \\\n",
|
||||||
|
"0 2021/2022 Canada 2021A000011124 Value of sales \n",
|
||||||
|
"1 2021/2022 Canada 2021A000011124 Value of sales \n",
|
||||||
|
"2 2021/2022 Canada 2021A000011124 Value of sales \n",
|
||||||
|
"3 2021/2022 Canada 2021A000011124 Value of sales \n",
|
||||||
|
"4 2021/2022 Canada 2021A000011124 Value of sales \n",
|
||||||
|
"\n",
|
||||||
|
" Type of cannabis UOM UOM_ID SCALAR_FACTOR SCALAR_ID \\\n",
|
||||||
|
"0 Total cannabis products Dollars 81 thousands 3 \n",
|
||||||
|
"1 Dried cannabis Dollars 81 thousands 3 \n",
|
||||||
|
"2 Inhaled cannabis extracts Dollars 81 thousands 3 \n",
|
||||||
|
"3 Ingested cannabis extracts Dollars 81 thousands 3 \n",
|
||||||
|
"4 Solid cannabis edibles Dollars 81 thousands 3 \n",
|
||||||
|
"\n",
|
||||||
|
" VECTOR COORDINATE VALUE STATUS SYMBOL TERMINATED DECIMALS \n",
|
||||||
|
"0 v1490436660 1.1.1 4027928.0 <NA> <NA> <NA> 0 \n",
|
||||||
|
"1 v1490436647 1.1.2 2861838.0 <NA> <NA> <NA> 0 \n",
|
||||||
|
"2 v1490436648 1.1.3 729178.0 <NA> <NA> <NA> 0 \n",
|
||||||
|
"3 v1490436649 1.1.4 158283.0 <NA> <NA> <NA> 0 \n",
|
||||||
|
"4 v1490436650 1.1.5 166336.0 <NA> <NA> <NA> 0 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 31,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 32,
|
||||||
|
"id": "fde0e149-d146-4516-8966-0989e4ccf290",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"REF_DATE string[python]\n",
|
||||||
|
"GEO string[python]\n",
|
||||||
|
"DGUID string[python]\n",
|
||||||
|
"Value string[python]\n",
|
||||||
|
"Type of cannabis string[python]\n",
|
||||||
|
"UOM string[python]\n",
|
||||||
|
"UOM_ID int16\n",
|
||||||
|
"SCALAR_FACTOR string[python]\n",
|
||||||
|
"SCALAR_ID int8\n",
|
||||||
|
"VECTOR string[python]\n",
|
||||||
|
"COORDINATE string[python]\n",
|
||||||
|
"VALUE float64\n",
|
||||||
|
"STATUS string[python]\n",
|
||||||
|
"SYMBOL string[python]\n",
|
||||||
|
"TERMINATED string[python]\n",
|
||||||
|
"DECIMALS int8\n",
|
||||||
|
"dtype: object"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 32,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.dtypes"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 33,
|
||||||
"id": "7579a135-1dfe-4fc0-991b-4b261d6577e0",
|
"id": "7579a135-1dfe-4fc0-991b-4b261d6577e0",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -451,8 +683,8 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"[1]\n",
|
"[0]\n",
|
||||||
"{'VALUE': 'float64'}\n"
|
"{'VALUE': 'Int64'}\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -469,19 +701,36 @@
|
|||||||
" print(convert_dict)\n",
|
" print(convert_dict)\n",
|
||||||
" df = df.astype(convert_dict)\n",
|
" df = df.astype(convert_dict)\n",
|
||||||
"elif 0 in (unique_decimal_values):\n",
|
"elif 0 in (unique_decimal_values):\n",
|
||||||
" if df[\"VALUE\"].dtype != \"int64\":\n",
|
" if df[\"VALUE\"].dtype != \"Int64\":\n",
|
||||||
" # If DECIMALS = [0]\n",
|
" # If DECIMALS = [0]\n",
|
||||||
" convert_dict = {\"VALUE\": \"int64\"}\n",
|
" convert_dict = {\"VALUE\": \"Int64\"}\n",
|
||||||
" print(convert_dict)\n",
|
" print(convert_dict)\n",
|
||||||
" df = df.astype(convert_dict)\n",
|
" df = df.astype(convert_dict)"
|
||||||
"\n",
|
]
|
||||||
"df = convert_to_lowest_type(df)\n",
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 36,
|
||||||
|
"id": "fe87b3b0-4e04-41a8-a704-75cb9829b0a5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df = convert_to_lowest_type(df)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 38,
|
||||||
|
"id": "74507546-4080-4962-88fc-58c1e3943d17",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
"df = compute_ref_date_bounds(df)"
|
"df = compute_ref_date_bounds(df)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 80,
|
"execution_count": 39,
|
||||||
"id": "6c2781b3-8eea-4317-a8c0-083d97ee04fc",
|
"id": "6c2781b3-8eea-4317-a8c0-083d97ee04fc",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -511,8 +760,8 @@
|
|||||||
" <th>REF_END_DATE</th>\n",
|
" <th>REF_END_DATE</th>\n",
|
||||||
" <th>GEO</th>\n",
|
" <th>GEO</th>\n",
|
||||||
" <th>DGUID</th>\n",
|
" <th>DGUID</th>\n",
|
||||||
" <th>North American Industry Classification System (NAICS)</th>\n",
|
" <th>Value</th>\n",
|
||||||
" <th>Summary statistics</th>\n",
|
" <th>Type of cannabis</th>\n",
|
||||||
" <th>UOM</th>\n",
|
" <th>UOM</th>\n",
|
||||||
" <th>UOM_ID</th>\n",
|
" <th>UOM_ID</th>\n",
|
||||||
" <th>SCALAR_FACTOR</th>\n",
|
" <th>SCALAR_FACTOR</th>\n",
|
||||||
@@ -529,144 +778,144 @@
|
|||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>0</th>\n",
|
" <th>0</th>\n",
|
||||||
" <td>2014</td>\n",
|
" <td>2021/2022</td>\n",
|
||||||
" <td>2014-01-01</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>2014-12-31</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>Canada</td>\n",
|
" <td>Canada</td>\n",
|
||||||
" <td>2016A000011124</td>\n",
|
" <td>2021A000011124</td>\n",
|
||||||
" <td>Nursing and residential care facilities [623]</td>\n",
|
" <td>Value of sales</td>\n",
|
||||||
" <td>Operating revenue</td>\n",
|
" <td>Total cannabis products</td>\n",
|
||||||
" <td>Dollars</td>\n",
|
" <td>Dollars</td>\n",
|
||||||
" <td>81</td>\n",
|
" <td>81</td>\n",
|
||||||
" <td>millions</td>\n",
|
" <td>thousands</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>3</td>\n",
|
||||||
" <td>v114809189</td>\n",
|
" <td>v1490436660</td>\n",
|
||||||
" <td>1.1.1</td>\n",
|
" <td>1.1.1</td>\n",
|
||||||
" <td>9310.7</td>\n",
|
" <td>4027928</td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td>1</td>\n",
|
" <td>0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>1</th>\n",
|
||||||
" <td>2014</td>\n",
|
" <td>2021/2022</td>\n",
|
||||||
" <td>2014-01-01</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>2014-12-31</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>Canada</td>\n",
|
" <td>Canada</td>\n",
|
||||||
" <td>2016A000011124</td>\n",
|
" <td>2021A000011124</td>\n",
|
||||||
" <td>Nursing and residential care facilities [623]</td>\n",
|
" <td>Value of sales</td>\n",
|
||||||
" <td>Operating expenses</td>\n",
|
" <td>Dried cannabis</td>\n",
|
||||||
" <td>Dollars</td>\n",
|
" <td>Dollars</td>\n",
|
||||||
" <td>81</td>\n",
|
" <td>81</td>\n",
|
||||||
" <td>millions</td>\n",
|
" <td>thousands</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>3</td>\n",
|
||||||
" <td>v114809190</td>\n",
|
" <td>v1490436647</td>\n",
|
||||||
" <td>1.1.2</td>\n",
|
" <td>1.1.2</td>\n",
|
||||||
" <td>8499.5</td>\n",
|
" <td>2861838</td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td>1</td>\n",
|
" <td>0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2</th>\n",
|
" <th>2</th>\n",
|
||||||
" <td>2014</td>\n",
|
" <td>2021/2022</td>\n",
|
||||||
" <td>2014-01-01</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>2014-12-31</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>Canada</td>\n",
|
" <td>Canada</td>\n",
|
||||||
" <td>2016A000011124</td>\n",
|
" <td>2021A000011124</td>\n",
|
||||||
" <td>Nursing and residential care facilities [623]</td>\n",
|
" <td>Value of sales</td>\n",
|
||||||
" <td>Salaries, wages, commissions and benefits</td>\n",
|
" <td>Inhaled cannabis extracts</td>\n",
|
||||||
" <td>Dollars</td>\n",
|
" <td>Dollars</td>\n",
|
||||||
" <td>81</td>\n",
|
" <td>81</td>\n",
|
||||||
" <td>millions</td>\n",
|
" <td>thousands</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>3</td>\n",
|
||||||
" <td>v114809191</td>\n",
|
" <td>v1490436648</td>\n",
|
||||||
" <td>1.1.3</td>\n",
|
" <td>1.1.3</td>\n",
|
||||||
" <td>4630.3</td>\n",
|
" <td>729178</td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td>1</td>\n",
|
" <td>0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>3</th>\n",
|
" <th>3</th>\n",
|
||||||
" <td>2014</td>\n",
|
" <td>2021/2022</td>\n",
|
||||||
" <td>2014-01-01</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>2014-12-31</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>Canada</td>\n",
|
" <td>Canada</td>\n",
|
||||||
" <td>2016A000011124</td>\n",
|
" <td>2021A000011124</td>\n",
|
||||||
" <td>Nursing and residential care facilities [623]</td>\n",
|
" <td>Value of sales</td>\n",
|
||||||
" <td>Operating profit margin</td>\n",
|
" <td>Ingested cannabis extracts</td>\n",
|
||||||
" <td>Percent</td>\n",
|
" <td>Dollars</td>\n",
|
||||||
" <td>239</td>\n",
|
" <td>81</td>\n",
|
||||||
" <td>units</td>\n",
|
" <td>thousands</td>\n",
|
||||||
" <td>0</td>\n",
|
" <td>3</td>\n",
|
||||||
" <td>v114809192</td>\n",
|
" <td>v1490436649</td>\n",
|
||||||
" <td>1.1.4</td>\n",
|
" <td>1.1.4</td>\n",
|
||||||
" <td>8.7</td>\n",
|
" <td>158283</td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td>1</td>\n",
|
" <td>0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>4</th>\n",
|
" <th>4</th>\n",
|
||||||
" <td>2014</td>\n",
|
" <td>2021/2022</td>\n",
|
||||||
" <td>2014-01-01</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>2014-12-31</td>\n",
|
" <td>NaT</td>\n",
|
||||||
" <td>Newfoundland and Labrador</td>\n",
|
" <td>Canada</td>\n",
|
||||||
" <td>2016A000210</td>\n",
|
" <td>2021A000011124</td>\n",
|
||||||
" <td>Nursing and residential care facilities [623]</td>\n",
|
" <td>Value of sales</td>\n",
|
||||||
" <td>Operating revenue</td>\n",
|
" <td>Solid cannabis edibles</td>\n",
|
||||||
" <td>Dollars</td>\n",
|
" <td>Dollars</td>\n",
|
||||||
" <td>81</td>\n",
|
" <td>81</td>\n",
|
||||||
" <td>millions</td>\n",
|
" <td>thousands</td>\n",
|
||||||
" <td>6</td>\n",
|
" <td>3</td>\n",
|
||||||
" <td>v114809193</td>\n",
|
" <td>v1490436650</td>\n",
|
||||||
" <td>2.1.1</td>\n",
|
" <td>1.1.5</td>\n",
|
||||||
" <td>97.9</td>\n",
|
" <td>166336</td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td><NA></td>\n",
|
" <td><NA></td>\n",
|
||||||
" <td>1</td>\n",
|
" <td>0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
"</div>"
|
"</div>"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" REF_DATE REF_START_DATE REF_END_DATE GEO \\\n",
|
" REF_DATE REF_START_DATE REF_END_DATE GEO DGUID \\\n",
|
||||||
"0 2014 2014-01-01 2014-12-31 Canada \n",
|
"0 2021/2022 NaT NaT Canada 2021A000011124 \n",
|
||||||
"1 2014 2014-01-01 2014-12-31 Canada \n",
|
"1 2021/2022 NaT NaT Canada 2021A000011124 \n",
|
||||||
"2 2014 2014-01-01 2014-12-31 Canada \n",
|
"2 2021/2022 NaT NaT Canada 2021A000011124 \n",
|
||||||
"3 2014 2014-01-01 2014-12-31 Canada \n",
|
"3 2021/2022 NaT NaT Canada 2021A000011124 \n",
|
||||||
"4 2014 2014-01-01 2014-12-31 Newfoundland and Labrador \n",
|
"4 2021/2022 NaT NaT Canada 2021A000011124 \n",
|
||||||
"\n",
|
"\n",
|
||||||
" DGUID North American Industry Classification System (NAICS) \\\n",
|
" Value Type of cannabis UOM UOM_ID SCALAR_FACTOR \\\n",
|
||||||
"0 2016A000011124 Nursing and residential care facilities [623] \n",
|
"0 Value of sales Total cannabis products Dollars 81 thousands \n",
|
||||||
"1 2016A000011124 Nursing and residential care facilities [623] \n",
|
"1 Value of sales Dried cannabis Dollars 81 thousands \n",
|
||||||
"2 2016A000011124 Nursing and residential care facilities [623] \n",
|
"2 Value of sales Inhaled cannabis extracts Dollars 81 thousands \n",
|
||||||
"3 2016A000011124 Nursing and residential care facilities [623] \n",
|
"3 Value of sales Ingested cannabis extracts Dollars 81 thousands \n",
|
||||||
"4 2016A000210 Nursing and residential care facilities [623] \n",
|
"4 Value of sales Solid cannabis edibles Dollars 81 thousands \n",
|
||||||
"\n",
|
"\n",
|
||||||
" Summary statistics UOM UOM_ID SCALAR_FACTOR \\\n",
|
" SCALAR_ID VECTOR COORDINATE VALUE STATUS SYMBOL TERMINATED \\\n",
|
||||||
"0 Operating revenue Dollars 81 millions \n",
|
"0 3 v1490436660 1.1.1 4027928 <NA> <NA> <NA> \n",
|
||||||
"1 Operating expenses Dollars 81 millions \n",
|
"1 3 v1490436647 1.1.2 2861838 <NA> <NA> <NA> \n",
|
||||||
"2 Salaries, wages, commissions and benefits Dollars 81 millions \n",
|
"2 3 v1490436648 1.1.3 729178 <NA> <NA> <NA> \n",
|
||||||
"3 Operating profit margin Percent 239 units \n",
|
"3 3 v1490436649 1.1.4 158283 <NA> <NA> <NA> \n",
|
||||||
"4 Operating revenue Dollars 81 millions \n",
|
"4 3 v1490436650 1.1.5 166336 <NA> <NA> <NA> \n",
|
||||||
"\n",
|
"\n",
|
||||||
" SCALAR_ID VECTOR COORDINATE VALUE STATUS SYMBOL TERMINATED DECIMALS \n",
|
" DECIMALS \n",
|
||||||
"0 6 v114809189 1.1.1 9310.7 <NA> <NA> <NA> 1 \n",
|
"0 0 \n",
|
||||||
"1 6 v114809190 1.1.2 8499.5 <NA> <NA> <NA> 1 \n",
|
"1 0 \n",
|
||||||
"2 6 v114809191 1.1.3 4630.3 <NA> <NA> <NA> 1 \n",
|
"2 0 \n",
|
||||||
"3 0 v114809192 1.1.4 8.7 <NA> <NA> <NA> 1 \n",
|
"3 0 \n",
|
||||||
"4 6 v114809193 2.1.1 97.9 <NA> <NA> <NA> 1 "
|
"4 0 "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 80,
|
"execution_count": 39,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -677,7 +926,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 81,
|
"execution_count": 17,
|
||||||
"id": "49cc1fa3-1ac8-4510-b4fd-7827a041e4a9",
|
"id": "49cc1fa3-1ac8-4510-b4fd-7827a041e4a9",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -691,7 +940,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Exporting dataframe as parquet to /data/tables/output/en/13100102.parquet\n"
|
"Exporting dataframe as parquet to /data/tables/output/en/10100164_test.parquet\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -710,7 +959,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": null,
|
||||||
"id": "788bc668-8057-4e06-91a3-b99991e0a410",
|
"id": "788bc668-8057-4e06-91a3-b99991e0a410",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"editable": true,
|
"editable": true,
|
||||||
@@ -719,28 +968,7 @@
|
|||||||
},
|
},
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Removing scratch files\n",
|
|
||||||
"Reading metadata /data/tables/input/metadata/43100011.json\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"ename": "OperationalError",
|
|
||||||
"evalue": "no such column: last_processed",
|
|
||||||
"output_type": "error",
|
|
||||||
"traceback": [
|
|
||||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
||||||
"\u001b[31mOperationalError\u001b[39m Traceback (most recent call last)",
|
|
||||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 6\u001b[39m\n\u001b[32m 4\u001b[39m os.remove(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mscratch_folder\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mproduct_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m_MetaData.csv\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 5\u001b[39m update_last_downloaded(product_id)\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m \u001b[43mupdate_last_processed\u001b[49m\u001b[43m(\u001b[49m\u001b[43mproduct_id\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
||||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 3\u001b[39m, in \u001b[36mupdate_last_processed\u001b[39m\u001b[34m(product_id)\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mupdate_last_processed\u001b[39m(product_id):\n\u001b[32m 2\u001b[39m time_finished_processing = datetime.now().isoformat()\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m \u001b[43mcur\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mUPDATE downloaded SET last_processed = ? WHERE product_id = ?\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mtime_finished_processing\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproduct_id\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m con.commit()\n",
|
|
||||||
"\u001b[31mOperationalError\u001b[39m: no such column: last_processed"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# Remove the scratch files\n",
|
"# Remove the scratch files\n",
|
||||||
"print(\"Removing scratch files\")\n",
|
"print(\"Removing scratch files\")\n",
|
||||||
@@ -749,39 +977,6 @@
|
|||||||
"update_last_downloaded(product_id)\n",
|
"update_last_downloaded(product_id)\n",
|
||||||
"update_last_processed(product_id)"
|
"update_last_processed(product_id)"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 40,
|
|
||||||
"id": "06fb89ad-77ba-46db-bb88-15f5636d707d",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"ename": "NameError",
|
|
||||||
"evalue": "name 'process_cube' is not defined",
|
|
||||||
"output_type": "error",
|
|
||||||
"traceback": [
|
|
||||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
||||||
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
|
|
||||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[40]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mprocess_cube\u001b[49m(\u001b[33m\"\u001b[39m\u001b[33m43100011\u001b[39m\u001b[33m\"\u001b[39m)\n",
|
|
||||||
"\u001b[31mNameError\u001b[39m: name 'process_cube' is not defined"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"process_cube(\"37100216\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "9cc04f15-006f-4a3a-9610-65736820ba84",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# This one has multiple DECIMAL precision values\n",
|
|
||||||
"process_cube(\"43100011)"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
Reference in New Issue
Block a user