mirror of
https://github.com/dataforcanada/d4c-service-geo-assistant.git
synced 2026-06-13 14:31:01 +02:00
Add docs to download overture places data & ollama model (#13)
* Add docs to download overture places data & ollama model * Hit local overture parquet files * Add osx gitignore * Add .env.example * Make overture data source selectable using .env * Add pytest marker to set right ENV vars during CI --------- Co-authored-by: Daniel Wiesmann <yellowcap@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
16f39f908f
commit
be8affaa6c
@@ -0,0 +1,16 @@
|
||||
# Ollama Configuration
|
||||
OLLAMA_AGENT_MODEL=gpt-oss:20b-cloud
|
||||
OLLAMA_IMAGE_MODEL=ministral-3:14b-cloud
|
||||
OLLAMA_BASE_URL=http://localhost:11434
|
||||
|
||||
# Overture Maps Configuration
|
||||
# Source: 'local' or 's3'
|
||||
OVERTURE_SOURCE=local
|
||||
OVERTURE_LOCAL_PATH=data/overture/places/*
|
||||
OVERTURE_S3_PATH=s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*
|
||||
|
||||
# Frontend Configuration
|
||||
# API base URL for the frontend to connect to (default: http://localhost:8000)
|
||||
API_BASE_URL=http://localhost:8000
|
||||
|
||||
AWS_REQUEST_PAYER=requester
|
||||
@@ -212,3 +212,10 @@ __marimo__/
|
||||
# Notebook
|
||||
.ipynb_checkpoints
|
||||
nbs/*
|
||||
!nbs/.gitkeep
|
||||
|
||||
# OSX
|
||||
.DS_Store
|
||||
# Data
|
||||
data/*
|
||||
!data/.gitkeep
|
||||
@@ -18,6 +18,26 @@ Edit `.env` to set your configuration:
|
||||
|
||||
The application will automatically load these variables from the `.env` file.
|
||||
|
||||
## Ollama Setup
|
||||
|
||||
Install [Ollama](https://ollama.ai/) and download the required models:
|
||||
|
||||
```bash
|
||||
ollama pull ministral-3:14b-cloud
|
||||
ollama pull gpt-oss:20b-cloud
|
||||
```
|
||||
|
||||
These models are used for agent and satellite image analysis.
|
||||
|
||||
## Data Setup
|
||||
|
||||
Download Overture Maps place data locally:
|
||||
|
||||
```bash
|
||||
mkdir -p data/overture/places
|
||||
aws s3 sync s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/ data/overture/places/
|
||||
```
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Pre-commit Hooks
|
||||
|
||||
@@ -3,11 +3,11 @@ from dotenv import load_dotenv
|
||||
|
||||
from langchain_ollama import ChatOllama
|
||||
|
||||
# Load environment variables from .env file
|
||||
# Load environment variables from env file
|
||||
load_dotenv()
|
||||
|
||||
# Get model name from environment variable, default to llama3.2
|
||||
MODEL_NAME = os.environ.get("OLLAMA_MODEL", "llama3.2")
|
||||
MODEL_NAME = os.environ.get("OLLAMA_AGENT_MODEL", "llama3.2")
|
||||
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
|
||||
|
||||
llm = ChatOllama(
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Annotated
|
||||
|
||||
import duckdb
|
||||
from dotenv import load_dotenv
|
||||
from langchain_core.messages import ToolMessage
|
||||
from langchain_core.tools import tool
|
||||
from langchain_core.tools.base import InjectedToolCallId
|
||||
from langgraph.types import Command
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def create_database_connection():
|
||||
"""Create and configure a DuckDB connection with necessary extensions.
|
||||
@@ -33,13 +38,17 @@ async def get_place(
|
||||
"""Get place location from Overture Maps based on user input place name."""
|
||||
|
||||
db_connection = create_database_connection()
|
||||
source = os.getenv("OVERTURE_SOURCE", "local")
|
||||
if source == "s3":
|
||||
data_path = os.getenv("OVERTURE_S3_PATH")
|
||||
db_connection.execute("SET s3_region='us-west-2';")
|
||||
else:
|
||||
data_path = os.getenv("OVERTURE_LOCAL_PATH")
|
||||
|
||||
location_results = db_connection.execute(
|
||||
f"""
|
||||
LOAD spatial;
|
||||
|
||||
SET s3_region='us-west-2';
|
||||
|
||||
SELECT
|
||||
id,
|
||||
jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) AS similarity_score,
|
||||
@@ -48,7 +57,7 @@ async def get_place(
|
||||
CAST(socials AS JSON) AS socials,
|
||||
ST_AsGeoJSON(geometry) AS geometry,
|
||||
FROM read_parquet(
|
||||
's3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*',
|
||||
'{data_path}',
|
||||
filename=true,
|
||||
hive_partitioning=1
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Tools for summarizing satellite images using LLM-based analysis."""
|
||||
|
||||
import os
|
||||
from typing import Annotated, Optional
|
||||
import dspy
|
||||
from langchain_core.tools import tool
|
||||
@@ -7,6 +8,10 @@ from langgraph.types import Command
|
||||
from langchain_core.messages import ToolMessage
|
||||
from langchain_core.tools.base import InjectedToolCallId
|
||||
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
class SatImgSummary(dspy.Signature):
|
||||
"Describe things you see in the satellite image."
|
||||
@@ -20,8 +25,8 @@ class SatImgSummaryAgent(dspy.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str = "ministral-3:14b-cloud",
|
||||
api_base: str = "http://localhost:11434",
|
||||
model: str = os.environ.get("OLLAMA_IMAGE_MODEL", "ministral-3:14b-cloud"),
|
||||
api_base: str = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434"),
|
||||
temperature: float = 0.5,
|
||||
max_tokens: int = 4_096,
|
||||
) -> None:
|
||||
|
||||
@@ -1,8 +1,22 @@
|
||||
import os
|
||||
import pytest
|
||||
from langchain_core.tools.base import ToolCall
|
||||
|
||||
from geo_assistant.tools.overture import get_place
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_ci_env():
|
||||
"""Configure S3 source for CI environments."""
|
||||
# Detect CI environment (GitHub Actions, GitLab CI, etc.)
|
||||
if os.getenv("CI") or os.getenv("GITHUB_ACTIONS"):
|
||||
os.environ["OVERTURE_SOURCE"] = "s3"
|
||||
os.environ["OVERTURE_S3_PATH"] = (
|
||||
"s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*"
|
||||
)
|
||||
yield
|
||||
|
||||
|
||||
async def test_get_place():
|
||||
command = await get_place.ainvoke(
|
||||
ToolCall(
|
||||
|
||||
Reference in New Issue
Block a user