diff --git a/experiments/zenodo/upload.ipynb b/experiments/zenodo/upload.ipynb new file mode 100644 index 0000000..5a9b83a --- /dev/null +++ b/experiments/zenodo/upload.ipynb @@ -0,0 +1,565 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "551568d0", + "metadata": {}, + "source": [ + "# Playing in Zenodo Sandbox\n", + "- Generate an access token in https://sandbox.zenodo.org/account/settings/applications/tokens/new/\n", + "- Add a .env and populate it as such\n", + "```\n", + "ACCESS_TOKEN=yourkey\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "05037dc2", + "metadata": {}, + "outputs": [], + "source": [ + "import pprint\n", + "import json\n", + "\n", + "from dotenv import dotenv_values\n", + "import requests\n", + "\n", + "config = dotenv_values(\".env\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "67f8da1d", + "metadata": {}, + "outputs": [], + "source": [ + "access_token = config['ACCESS_TOKEN']" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "34be3eee", + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"metadata\": {\n", + " \"title\": \"My first upload\",\n", + " \"upload_type\": \"poster\",\n", + " \"description\": \"This is my first upload\",\n", + " \"creators\": [\n", + " {\"name\": \"Doe, John\", \"affiliation\": \"Zenodo\"}\n", + " ]\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "fc0e9407", + "metadata": {}, + "outputs": [], + "source": [ + "headers = {\"Content-Type\": \"application/json\"}\n", + "params = {\n", + " \"access_token\": access_token\n", + "}\n", + "r = requests.post('https://sandbox.zenodo.org/api/deposit/depositions',\n", + " data=json.dumps(data),\n", + " params=params,\n", + " headers=headers)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "cd1d370d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "201\n" + ] + } + ], + "source": [ + "print(r.status_code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28eaaf52", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'created': '2025-07-07T16:20:19.283543+00:00', 'modified': '2025-07-07T16:20:19.376843+00:00', 'id': 283543, 'conceptrecid': '283542', 'metadata': {'title': 'My first upload', 'publication_date': '2025-07-07', 'description': 'This is my first upload', 'access_right': 'open', 'creators': [{'name': 'Doe, John', 'affiliation': 'Zenodo'}], 'license': 'cc-by-4.0', 'imprint_publisher': 'Zenodo', 'upload_type': 'poster', 'prereserve_doi': {'doi': '10.5281/zenodo.283543', 'recid': 283543}}, 'title': 'My first upload', 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283543', 'html': 'https://sandbox.zenodo.org/deposit/283543', 'badge': 'https://sandbox.zenodo.org/badge/doi/.svg', 'files': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/files', 'bucket': 'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca', 'latest_draft': 'https://sandbox.zenodo.org/api/deposit/depositions/283543', 'latest_draft_html': 'https://sandbox.zenodo.org/deposit/283543', 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/actions/publish', 'edit': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/actions/edit', 'discard': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/actions/discard', 'newversion': 'https://sandbox.zenodo.org/api/deposit/depositions/283543/actions/newversion'}, 'record_id': 283543, 'owner': 45472, 'files': [], 'state': 'unsubmitted', 'submitted': False}\n" + ] + } + ], + "source": [ + "# I need the id of the upload to publish\n", + "print(r.json())" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "92bdcbb3", + "metadata": {}, + "outputs": [], + "source": [ + "bucket_url = r.json()[\"links\"][\"bucket\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "3aa27864", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bucket_url" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "d08efac2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca/43100024.parquet\n" + ] + } + ], + "source": [ + "filename = \"43100024.parquet\"\n", + "with open(f\"/data/tables/output/en/{filename}\", \"rb\") as fp:\n", + " r = requests.put(f\"{bucket_url}/{filename}\",\n", + " data=fp,\n", + " params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f4cbe29", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'created': '2025-07-07T16:28:23.225641+00:00',\n", + " 'updated': '2025-07-07T16:29:20.759483+00:00',\n", + " 'version_id': 'fd809f90-47bd-4248-9489-7cb72c3a5e69',\n", + " 'key': '43100024.parquet',\n", + " 'size': 184126112,\n", + " 'mimetype': 'application/octet-stream',\n", + " 'checksum': 'md5:4b5503868cb364b34883aa81560b86df',\n", + " 'is_head': True,\n", + " 'delete_marker': False,\n", + " 'links': {'self': 'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca/43100024.parquet',\n", + " 'version': 'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca/43100024.parquet?version_id=fd809f90-47bd-4248-9489-7cb72c3a5e69',\n", + " 'uploads': 'https://sandbox.zenodo.org/api/files/42ffdb14-7d46-4e5a-b71b-0ccb6ebff3ca/43100024.parquet?uploads=1'}}" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The version link is for this specific version\n", + "r.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09154176", + "metadata": {}, + "outputs": [], + "source": [ + "# Publish the upload\n", + "id = \"283543\"\n", + "r = requests.post(f\"https://sandbox.zenodo.org/api/deposit/depositions/{id}/actions/publish\",\n", + " params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "48fa9e82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "202" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.status_code" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "412cf6e5", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a new version\n", + "r = requests.post(f\"https://sandbox.zenodo.org/api/deposit/depositions/{id}/actions/newversion\",\n", + " params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "8e1ccaae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "201" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.status_code" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "1b7caf46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'created': '2025-07-07T16:41:03.554066+00:00',\n", + " 'modified': '2025-07-07T16:41:03.709857+00:00',\n", + " 'id': 283548,\n", + " 'conceptrecid': '283542',\n", + " 'conceptdoi': '10.5072/zenodo.283542',\n", + " 'metadata': {'title': 'My first upload',\n", + " 'description': 'This is my first upload',\n", + " 'access_right': 'open',\n", + " 'creators': [{'name': 'Doe, John', 'affiliation': 'Zenodo'}],\n", + " 'license': 'cc-by-4.0',\n", + " 'imprint_publisher': 'Zenodo',\n", + " 'upload_type': 'poster',\n", + " 'prereserve_doi': {'doi': '10.5281/zenodo.283548', 'recid': 283548}},\n", + " 'title': 'My first upload',\n", + " 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283548',\n", + " 'html': 'https://sandbox.zenodo.org/deposit/283548',\n", + " 'badge': 'https://sandbox.zenodo.org/badge/doi/.svg',\n", + " 'files': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/files',\n", + " 'bucket': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5',\n", + " 'latest_draft': 'https://sandbox.zenodo.org/api/deposit/depositions/283548',\n", + " 'latest_draft_html': 'https://sandbox.zenodo.org/deposit/283548',\n", + " 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/publish',\n", + " 'edit': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/edit',\n", + " 'discard': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/discard',\n", + " 'newversion': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/newversion'},\n", + " 'record_id': 283548,\n", + " 'owner': 45472,\n", + " 'files': [{'id': 'e17e0ee2-713c-41f4-8473-bb20e261f262',\n", + " 'filename': '43100024.parquet',\n", + " 'filesize': 184126112,\n", + " 'checksum': '4b5503868cb364b34883aa81560b86df',\n", + " 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/files/e17e0ee2-713c-41f4-8473-bb20e261f262',\n", + " 'download': 'https://sandbox.zenodo.org/api/records/283548/draft/files/43100024.parquet/content'}}],\n", + " 'state': 'unsubmitted',\n", + " 'submitted': False}" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.json()" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "7a094e61", + "metadata": {}, + "outputs": [], + "source": [ + "# Need the \"id\" to publish the new version\n", + "id = \"283548\"" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "a837bdd6", + "metadata": {}, + "outputs": [], + "source": [ + "bucket_url = r.json()[\"links\"][\"bucket\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "dd9e8fd4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5'" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bucket_url" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "23143c01", + "metadata": {}, + "outputs": [], + "source": [ + "# Add a different file\n", + "filename = \"12100136.parquet\"\n", + "with open(f\"/data/tables/output/en/{filename}\", \"rb\") as fp:\n", + " r = requests.put(f\"{bucket_url}/43100024.parquet\",\n", + " data=fp,\n", + " params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e1e5fd1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "201\n" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "01ff1e95", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'created': '2025-07-07T18:20:35.453877+00:00',\n", + " 'updated': '2025-07-07T18:24:47.259818+00:00',\n", + " 'version_id': '20745124-2e18-4062-b1cb-43571916e3a3',\n", + " 'key': '43100024.parquet',\n", + " 'size': 186230461,\n", + " 'mimetype': 'application/octet-stream',\n", + " 'checksum': 'md5:ab89ee1b1f09834f78f8ad1b46e97659',\n", + " 'is_head': True,\n", + " 'delete_marker': False,\n", + " 'links': {'self': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5/43100024.parquet',\n", + " 'version': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5/43100024.parquet?version_id=20745124-2e18-4062-b1cb-43571916e3a3',\n", + " 'uploads': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5/43100024.parquet?uploads=1'}}" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5bb978e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'created': '2025-07-07T16:41:03.554066+00:00', 'modified': '2025-07-07T18:40:21.806148+00:00', 'id': 283548, 'conceptrecid': '283542', 'conceptdoi': '10.5072/zenodo.283542', 'metadata': {'title': 'My second upload', 'publication_date': '2025-06-14', 'access_right': 'open', 'creators': [{'name': 'Diego Ripley', 'affiliation': None}], 'license': 'cc-zero', 'imprint_publisher': 'Zenodo', 'upload_type': 'dataset', 'prereserve_doi': {'doi': '10.5281/zenodo.283548', 'recid': 283548}}, 'title': 'My second upload', 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283548', 'html': 'https://sandbox.zenodo.org/deposit/283548', 'badge': 'https://sandbox.zenodo.org/badge/doi/.svg', 'files': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/files', 'bucket': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5', 'latest_draft': 'https://sandbox.zenodo.org/api/deposit/depositions/283548', 'latest_draft_html': 'https://sandbox.zenodo.org/deposit/283548', 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/publish', 'edit': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/edit', 'discard': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/discard', 'newversion': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/newversion'}, 'record_id': 283548, 'owner': 45472, 'files': [{'id': '97507810-a355-416e-8f6c-679c9dc1d564', 'filename': '43100024.parquet', 'filesize': 186230461, 'checksum': 'ab89ee1b1f09834f78f8ad1b46e97659', 'links': {'self': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/files/97507810-a355-416e-8f6c-679c9dc1d564', 'download': 'https://sandbox.zenodo.org/api/records/283548/draft/files/43100024.parquet/content'}}], 'state': 'unsubmitted', 'submitted': False}\n" + ] + } + ], + "source": [ + "# Need to add the metadata.publication_date\n", + "# Date of publication in ISO8601 format (YYYY-MM-DD). Defaults to current date.\n", + "data = {\n", + " \"metadata\": {\n", + " \"publication_date\": \"2025-07-14\",\n", + " \"upload_type\": \"dataset\",\n", + " \"title\": \"My second upload\",\n", + " \"creators\": [{\"name\": \"Diego Ripley\"}]\n", + " }\n", + "}\n", + "r = requests.put(f\"https://sandbox.zenodo.org/api/deposit/depositions/{id}\",\n", + " params=params,\n", + " data=json.dumps(data))\n", + "print(r.json())" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "d7ece7f0", + "metadata": {}, + "outputs": [], + "source": [ + "r = requests.post(f\"https://sandbox.zenodo.org/api/deposit/depositions/{id}/actions/publish\",\n", + " params=params)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "20afb27b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'created': '2025-07-07T18:40:25.191132+00:00',\n", + " 'modified': '2025-07-07T18:40:25.367499+00:00',\n", + " 'id': 283548,\n", + " 'conceptrecid': '283542',\n", + " 'doi': '10.5072/zenodo.283548',\n", + " 'conceptdoi': '10.5072/zenodo.283542',\n", + " 'doi_url': 'https://doi.org/10.5072/zenodo.283548',\n", + " 'metadata': {'title': 'My second upload',\n", + " 'doi': '10.5072/zenodo.283548',\n", + " 'publication_date': '2025-06-14',\n", + " 'access_right': 'open',\n", + " 'creators': [{'name': 'Diego Ripley', 'affiliation': None}],\n", + " 'license': 'cc-zero',\n", + " 'imprint_publisher': 'Zenodo',\n", + " 'upload_type': 'dataset',\n", + " 'prereserve_doi': {'doi': '10.5281/zenodo.283548', 'recid': 283548}},\n", + " 'title': 'My second upload',\n", + " 'links': {'self': 'https://sandbox.zenodo.org/api/records/283548',\n", + " 'html': 'https://sandbox.zenodo.org/records/283548',\n", + " 'doi': 'https://doi.org/10.5072/zenodo.283548',\n", + " 'parent_doi': 'https://doi.org/10.5072/zenodo.283542',\n", + " 'badge': 'https://sandbox.zenodo.org/badge/doi/10.5072%2Fzenodo.283548.svg',\n", + " 'conceptbadge': 'https://sandbox.zenodo.org/badge/doi/10.5072%2Fzenodo.283542.svg',\n", + " 'files': 'https://sandbox.zenodo.org/api/records/283548/files',\n", + " 'bucket': 'https://sandbox.zenodo.org/api/files/522c1cea-4970-40cd-9e78-7f15303205d5',\n", + " 'latest_draft': 'https://sandbox.zenodo.org/api/deposit/depositions/283548',\n", + " 'latest_draft_html': 'https://sandbox.zenodo.org/deposit/283548',\n", + " 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/publish',\n", + " 'edit': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/edit',\n", + " 'discard': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/discard',\n", + " 'newversion': 'https://sandbox.zenodo.org/api/deposit/depositions/283548/actions/newversion',\n", + " 'record': 'https://sandbox.zenodo.org/api/records/283548',\n", + " 'record_html': 'https://sandbox.zenodo.org/record/283548',\n", + " 'latest': 'https://sandbox.zenodo.org/api/records/283548/versions/latest',\n", + " 'latest_html': 'https://sandbox.zenodo.org/records/283548/latest'},\n", + " 'record_id': 283548,\n", + " 'owner': 45472,\n", + " 'files': [{'id': '97507810-a355-416e-8f6c-679c9dc1d564',\n", + " 'filename': '43100024.parquet',\n", + " 'filesize': 186230461,\n", + " 'checksum': 'ab89ee1b1f09834f78f8ad1b46e97659',\n", + " 'links': {'self': 'https://sandbox.zenodo.org/api/records/283548/files/97507810-a355-416e-8f6c-679c9dc1d564',\n", + " 'download': 'https://sandbox.zenodo.org/api/records/283548/draft/files/43100024.parquet/content'}}],\n", + " 'state': 'done',\n", + " 'submitted': True}" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da12e35a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}