Files
d4c-datapkg-statistical/experiments/zenodo/upload.ipynb
T
2025-07-10 17:04:28 +00:00

566 lines
19 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "551568d0",
"metadata": {},
"source": [
"# Playing in Zenodo Sandbox\n",
"- Generate an access token in https://sandbox.zenodo.org/account/settings/applications/tokens/new/\n",
"- Add a .env and populate it as such\n",
"```\n",
"ACCESS_TOKEN=yourkey\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "05037dc2",
"metadata": {},
"outputs": [],
"source": [
"import pprint\n",
"import json\n",
"\n",
"from dotenv import dotenv_values\n",
"import requests\n",
"\n",
"config = dotenv_values(\".env\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "67f8da1d",
"metadata": {},
"outputs": [],
"source": [
"access_token = config['ACCESS_TOKEN']"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "34be3eee",
"metadata": {},
"outputs": [],
"source": [
"data = {\n",
" \"metadata\": {\n",
" \"title\": \"My first upload\",\n",
" \"upload_type\": \"poster\",\n",
" \"description\": \"This is my first upload\",\n",
" \"creators\": [\n",
" {\"name\": \"Doe, John\", \"affiliation\": \"Zenodo\"}\n",
" ]\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "fc0e9407",
"metadata": {},
"outputs": [],
"source": [
"headers = {\"Content-Type\": \"application/json\"}\n",
"params = {\n",
" \"access_token\": access_token\n",
"}\n",
"r = requests.post('https://sandbox.zenodo.org/api/deposit/depositions',\n",
" data=json.dumps(data),\n",
" params=params,\n",
" headers=headers)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "cd1d370d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"201\n"
]
}
],
"source": [
"print(r.status_code)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "28eaaf52",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'created': '2025-07-07T16:20:19.283543+00:00', 'modified': '2025-07-07T16:20:19.376843+00:00', 'id': 283543, 'conceptrecid': '283542', 'metadata': {'title': 'My first upload', 'publication_date': '2025-07-07', 'description': 'This is my first upload', 'access_right': 'open', 'creators': [{'name': 'Doe, John', 'affiliation': 'Zenodo'}], 'license': 'cc-by-4.0', 'imprint_publisher': 'Zenodo', 'upload_type': 'poster', 'prereserve_doi': {'doi': '10.5281/zenodo.283543', 'recid': 283543}}, 'title': 'My first upload', 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283543', 'html': 'https://sandbox.zenodo.org/deposit/283543', 'badge': 'https://sandbox.zenodo.org/badge/doi/.svg', 'files': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/files', 'bucket': 'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca', 'latest_draft': 'https://sandbox.zenodo.org/api/deposit/depositions/283543', 'latest_draft_html': 'https://sandbox.zenodo.org/deposit/283543', 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/actions/publish', 'edit': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/actions/edit', 'discard': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/actions/discard', 'newversion': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/actions/newversion'}, 'record_id': 283543, 'owner': 45472, 'files': [], 'state': 'unsubmitted', 'submitted': False}\n"
]
}
],
"source": [
"# I need the id of the upload to publish\n",
"print(r.json())"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "92bdcbb3",
"metadata": {},
"outputs": [],
"source": [
"bucket_url = r.json()[\"links\"][\"bucket\"]"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "3aa27864",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca'"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bucket_url"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "d08efac2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca/43100024.parquet\n"
]
}
],
"source": [
"filename = \"43100024.parquet\"\n",
"with open(f\"/data/tables/output/en/{filename}\", \"rb\") as fp:\n",
" r = requests.put(f\"{bucket_url}/{filename}\",\n",
" data=fp,\n",
" params=params)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f4cbe29",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'created': '2025-07-07T16:28:23.225641+00:00',\n",
" 'updated': '2025-07-07T16:29:20.759483+00:00',\n",
" 'version_id': 'fd809f90-47bd-4248-9489-7cb72c3a5e69',\n",
" 'key': '43100024.parquet',\n",
" 'size': 184126112,\n",
" 'mimetype': 'application/octet-stream',\n",
" 'checksum': 'md5:4b5503868cb364b34883aa81560b86df',\n",
" 'is_head': True,\n",
" 'delete_marker': False,\n",
" 'links': {'self': 'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca/43100024.parquet',\n",
" 'version': 'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca/43100024.parquet?version_id=fd809f90-47bd-4248-9489-7cb72c3a5e69',\n",
" 'uploads': 'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca/43100024.parquet?uploads=1'}}"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The version link is for this specific version\n",
"r.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09154176",
"metadata": {},
"outputs": [],
"source": [
"# Publish the upload\n",
"id = \"283543\"\n",
"r = requests.post(f\"https://sandbox.zenodo.org/api/deposit/depositions/{id}/actions/publish\",\n",
" params=params)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "48fa9e82",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"202"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r.status_code"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "412cf6e5",
"metadata": {},
"outputs": [],
"source": [
"# Create a new version\n",
"r = requests.post(f\"https://sandbox.zenodo.org/api/deposit/depositions/{id}/actions/newversion\",\n",
" params=params)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "8e1ccaae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"201"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r.status_code"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "1b7caf46",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'created': '2025-07-07T16:41:03.554066+00:00',\n",
" 'modified': '2025-07-07T16:41:03.709857+00:00',\n",
" 'id': 283548,\n",
" 'conceptrecid': '283542',\n",
" 'conceptdoi': '10.5072/zenodo.283542',\n",
" 'metadata': {'title': 'My first upload',\n",
" 'description': 'This is my first upload',\n",
" 'access_right': 'open',\n",
" 'creators': [{'name': 'Doe, John', 'affiliation': 'Zenodo'}],\n",
" 'license': 'cc-by-4.0',\n",
" 'imprint_publisher': 'Zenodo',\n",
" 'upload_type': 'poster',\n",
" 'prereserve_doi': {'doi': '10.5281/zenodo.283548', 'recid': 283548}},\n",
" 'title': 'My first upload',\n",
" 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283548',\n",
" 'html': 'https://sandbox.zenodo.org/deposit/283548',\n",
" 'badge': 'https://sandbox.zenodo.org/badge/doi/.svg',\n",
" 'files': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/files',\n",
" 'bucket': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5',\n",
" 'latest_draft': 'https://sandbox.zenodo.org/api/deposit/depositions/283548',\n",
" 'latest_draft_html': 'https://sandbox.zenodo.org/deposit/283548',\n",
" 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/publish',\n",
" 'edit': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/edit',\n",
" 'discard': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/discard',\n",
" 'newversion': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/newversion'},\n",
" 'record_id': 283548,\n",
" 'owner': 45472,\n",
" 'files': [{'id': 'e17e0ee2-713c-41f4-8473-bb20e261f262',\n",
" 'filename': '43100024.parquet',\n",
" 'filesize': 184126112,\n",
" 'checksum': '4b5503868cb364b34883aa81560b86df',\n",
" 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/files/e17e0ee2-713c-41f4-8473-bb20e261f262',\n",
" 'download': 'https://sandbox.zenodo.org/api/records/283548/draft/files/43100024.parquet/content'}}],\n",
" 'state': 'unsubmitted',\n",
" 'submitted': False}"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r.json()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "7a094e61",
"metadata": {},
"outputs": [],
"source": [
"# Need the \"id\" to publish the new version\n",
"id = \"283548\""
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "a837bdd6",
"metadata": {},
"outputs": [],
"source": [
"bucket_url = r.json()[\"links\"][\"bucket\"]"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "dd9e8fd4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5'"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bucket_url"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "23143c01",
"metadata": {},
"outputs": [],
"source": [
"# Add a different file\n",
"filename = \"12100136.parquet\"\n",
"with open(f\"/data/tables/output/en/{filename}\", \"rb\") as fp:\n",
" r = requests.put(f\"{bucket_url}/43100024.parquet\",\n",
" data=fp,\n",
" params=params)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8e1e5fd1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"201\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 71,
"id": "01ff1e95",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'created': '2025-07-07T18:20:35.453877+00:00',\n",
" 'updated': '2025-07-07T18:24:47.259818+00:00',\n",
" 'version_id': '20745124-2e18-4062-b1cb-43571916e3a3',\n",
" 'key': '43100024.parquet',\n",
" 'size': 186230461,\n",
" 'mimetype': 'application/octet-stream',\n",
" 'checksum': 'md5:ab89ee1b1f09834f78f8ad1b46e97659',\n",
" 'is_head': True,\n",
" 'delete_marker': False,\n",
" 'links': {'self': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5/43100024.parquet',\n",
" 'version': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5/43100024.parquet?version_id=20745124-2e18-4062-b1cb-43571916e3a3',\n",
" 'uploads': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5/43100024.parquet?uploads=1'}}"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5bb978e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'created': '2025-07-07T16:41:03.554066+00:00', 'modified': '2025-07-07T18:40:21.806148+00:00', 'id': 283548, 'conceptrecid': '283542', 'conceptdoi': '10.5072/zenodo.283542', 'metadata': {'title': 'My second upload', 'publication_date': '2025-06-14', 'access_right': 'open', 'creators': [{'name': 'Diego Ripley', 'affiliation': None}], 'license': 'cc-zero', 'imprint_publisher': 'Zenodo', 'upload_type': 'dataset', 'prereserve_doi': {'doi': '10.5281/zenodo.283548', 'recid': 283548}}, 'title': 'My second upload', 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283548', 'html': 'https://sandbox.zenodo.org/deposit/283548', 'badge': 'https://sandbox.zenodo.org/badge/doi/.svg', 'files': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/files', 'bucket': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5', 'latest_draft': 'https://sandbox.zenodo.org/api/deposit/depositions/283548', 'latest_draft_html': 'https://sandbox.zenodo.org/deposit/283548', 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/publish', 'edit': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/edit', 'discard': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/discard', 'newversion': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/newversion'}, 'record_id': 283548, 'owner': 45472, 'files': [{'id': '97507810-a355-416e-8f6c-679c9dc1d564', 'filename': '43100024.parquet', 'filesize': 186230461, 'checksum': 'ab89ee1b1f09834f78f8ad1b46e97659', 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/files/97507810-a355-416e-8f6c-679c9dc1d564', 'download': 'https://sandbox.zenodo.org/api/records/283548/draft/files/43100024.parquet/content'}}], 'state': 'unsubmitted', 'submitted': False}\n"
]
}
],
"source": [
"# Need to add the metadata.publication_date\n",
"# Date of publication in ISO8601 format (YYYY-MM-DD). Defaults to current date.\n",
"data = {\n",
" \"metadata\": {\n",
" \"publication_date\": \"2025-07-14\",\n",
" \"upload_type\": \"dataset\",\n",
" \"title\": \"My second upload\",\n",
" \"creators\": [{\"name\": \"Diego Ripley\"}]\n",
" }\n",
"}\n",
"r = requests.put(f\"https://sandbox.zenodo.org/api/deposit/depositions/{id}\",\n",
" params=params,\n",
" data=json.dumps(data))\n",
"print(r.json())"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "d7ece7f0",
"metadata": {},
"outputs": [],
"source": [
"r = requests.post(f\"https://sandbox.zenodo.org/api/deposit/depositions/{id}/actions/publish\",\n",
" params=params)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "20afb27b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'created': '2025-07-07T18:40:25.191132+00:00',\n",
" 'modified': '2025-07-07T18:40:25.367499+00:00',\n",
" 'id': 283548,\n",
" 'conceptrecid': '283542',\n",
" 'doi': '10.5072/zenodo.283548',\n",
" 'conceptdoi': '10.5072/zenodo.283542',\n",
" 'doi_url': 'https://doi.org/10.5072/zenodo.283548',\n",
" 'metadata': {'title': 'My second upload',\n",
" 'doi': '10.5072/zenodo.283548',\n",
" 'publication_date': '2025-06-14',\n",
" 'access_right': 'open',\n",
" 'creators': [{'name': 'Diego Ripley', 'affiliation': None}],\n",
" 'license': 'cc-zero',\n",
" 'imprint_publisher': 'Zenodo',\n",
" 'upload_type': 'dataset',\n",
" 'prereserve_doi': {'doi': '10.5281/zenodo.283548', 'recid': 283548}},\n",
" 'title': 'My second upload',\n",
" 'links': {'self': 'https://sandbox.zenodo.org/api/records/283548',\n",
" 'html': 'https://sandbox.zenodo.org/records/283548',\n",
" 'doi': 'https://doi.org/10.5072/zenodo.283548',\n",
" 'parent_doi': 'https://doi.org/10.5072/zenodo.283542',\n",
" 'badge': 'https://sandbox.zenodo.org/badge/doi/10.5072%2Fzenodo.283548.svg',\n",
" 'conceptbadge': 'https://sandbox.zenodo.org/badge/doi/10.5072%2Fzenodo.283542.svg',\n",
" 'files': 'https://sandbox.zenodo.org/api/records/283548/files',\n",
" 'bucket': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5',\n",
" 'latest_draft': 'https://sandbox.zenodo.org/api/deposit/depositions/283548',\n",
" 'latest_draft_html': 'https://sandbox.zenodo.org/deposit/283548',\n",
" 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/publish',\n",
" 'edit': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/edit',\n",
" 'discard': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/discard',\n",
" 'newversion': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/newversion',\n",
" 'record': 'https://sandbox.zenodo.org/api/records/283548',\n",
" 'record_html': 'https://sandbox.zenodo.org/record/283548',\n",
" 'latest': 'https://sandbox.zenodo.org/api/records/283548/versions/latest',\n",
" 'latest_html': 'https://sandbox.zenodo.org/records/283548/latest'},\n",
" 'record_id': 283548,\n",
" 'owner': 45472,\n",
" 'files': [{'id': '97507810-a355-416e-8f6c-679c9dc1d564',\n",
" 'filename': '43100024.parquet',\n",
" 'filesize': 186230461,\n",
" 'checksum': 'ab89ee1b1f09834f78f8ad1b46e97659',\n",
" 'links': {'self': 'https://sandbox.zenodo.org/api/records/283548/files/97507810-a355-416e-8f6c-679c9dc1d564',\n",
" 'download': 'https://sandbox.zenodo.org/api/records/283548/draft/files/43100024.parquet/content'}}],\n",
" 'state': 'done',\n",
" 'submitted': True}"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da12e35a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}