Add docs to download overture places data & ollama model (#13)

* Add docs to download overture places data & ollama model
* Hit local overture parquet files
* Add osx gitignore
* Add .env.example
* Make overture data source selectable using .env
* Add pytest marker to set right ENV vars during CI

---------

Co-authored-by: Daniel Wiesmann <yellowcap@users.noreply.github.com>
This commit is contained in:
Soumya Ranjan Mohanty
2025-12-04 22:11:10 +05:30
committed by GitHub
parent 16f39f908f
commit be8affaa6c
10 changed files with 1848 additions and 1777 deletions
+16
View File
@@ -0,0 +1,16 @@
# Ollama Configuration
OLLAMA_AGENT_MODEL=gpt-oss:20b-cloud
OLLAMA_IMAGE_MODEL=ministral-3:14b-cloud
OLLAMA_BASE_URL=http://localhost:11434
# Overture Maps Configuration
# Source: 'local' or 's3'
OVERTURE_SOURCE=local
OVERTURE_LOCAL_PATH=data/overture/places/*
OVERTURE_S3_PATH=s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*
# Frontend Configuration
# API base URL for the frontend to connect to (default: http://localhost:8000)
API_BASE_URL=http://localhost:8000
AWS_REQUEST_PAYER=requester
+7
View File
@@ -212,3 +212,10 @@ __marimo__/
# Notebook
.ipynb_checkpoints
nbs/*
!nbs/.gitkeep
# OSX
.DS_Store
# Data
data/*
!data/.gitkeep
+20
View File
@@ -18,6 +18,26 @@ Edit `.env` to set your configuration:
The application will automatically load these variables from the `.env` file.
## Ollama Setup
Install [Ollama](https://ollama.ai/) and download the required models:
```bash
ollama pull ministral-3:14b-cloud
ollama pull gpt-oss:20b-cloud
```
These models are used for agent and satellite image analysis.
## Data Setup
Download Overture Maps place data locally:
```bash
mkdir -p data/overture/places
aws s3 sync s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/ data/overture/places/
```
## Development Setup
### Pre-commit Hooks
View File
View File
+2 -2
View File
@@ -3,11 +3,11 @@ from dotenv import load_dotenv
from langchain_ollama import ChatOllama
# Load environment variables from .env file
# Load environment variables from env file
load_dotenv()
# Get model name from environment variable, default to llama3.2
MODEL_NAME = os.environ.get("OLLAMA_MODEL", "llama3.2")
MODEL_NAME = os.environ.get("OLLAMA_AGENT_MODEL", "llama3.2")
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
llm = ChatOllama(
+12 -3
View File
@@ -1,12 +1,17 @@
import json
import os
from typing import Annotated
import duckdb
from dotenv import load_dotenv
from langchain_core.messages import ToolMessage
from langchain_core.tools import tool
from langchain_core.tools.base import InjectedToolCallId
from langgraph.types import Command
# Load environment variables
load_dotenv()
def create_database_connection():
"""Create and configure a DuckDB connection with necessary extensions.
@@ -33,13 +38,17 @@ async def get_place(
"""Get place location from Overture Maps based on user input place name."""
db_connection = create_database_connection()
source = os.getenv("OVERTURE_SOURCE", "local")
if source == "s3":
data_path = os.getenv("OVERTURE_S3_PATH")
db_connection.execute("SET s3_region='us-west-2';")
else:
data_path = os.getenv("OVERTURE_LOCAL_PATH")
location_results = db_connection.execute(
f"""
LOAD spatial;
SET s3_region='us-west-2';
SELECT
id,
jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) AS similarity_score,
@@ -48,7 +57,7 @@ async def get_place(
CAST(socials AS JSON) AS socials,
ST_AsGeoJSON(geometry) AS geometry,
FROM read_parquet(
's3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*',
'{data_path}',
filename=true,
hive_partitioning=1
)
+7 -2
View File
@@ -1,5 +1,6 @@
"""Tools for summarizing satellite images using LLM-based analysis."""
import os
from typing import Annotated, Optional
import dspy
from langchain_core.tools import tool
@@ -7,6 +8,10 @@ from langgraph.types import Command
from langchain_core.messages import ToolMessage
from langchain_core.tools.base import InjectedToolCallId
import dotenv
dotenv.load_dotenv()
class SatImgSummary(dspy.Signature):
"Describe things you see in the satellite image."
@@ -20,8 +25,8 @@ class SatImgSummaryAgent(dspy.Module):
def __init__(
self,
model: str = "ministral-3:14b-cloud",
api_base: str = "http://localhost:11434",
model: str = os.environ.get("OLLAMA_IMAGE_MODEL", "ministral-3:14b-cloud"),
api_base: str = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434"),
temperature: float = 0.5,
max_tokens: int = 4_096,
) -> None:
+14
View File
@@ -1,8 +1,22 @@
import os
import pytest
from langchain_core.tools.base import ToolCall
from geo_assistant.tools.overture import get_place
@pytest.fixture(autouse=True)
def setup_ci_env():
"""Configure S3 source for CI environments."""
# Detect CI environment (GitHub Actions, GitLab CI, etc.)
if os.getenv("CI") or os.getenv("GITHUB_ACTIONS"):
os.environ["OVERTURE_SOURCE"] = "s3"
os.environ["OVERTURE_S3_PATH"] = (
"s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*"
)
yield
async def test_get_place():
command = await get_place.ainvoke(
ToolCall(
Generated
+1770 -1770
View File
File diff suppressed because it is too large Load Diff