mirror of
https://github.com/dataforcanada/d4c-service-geo-assistant.git
synced 2026-06-13 14:31:01 +02:00
Add docs to download overture places data & ollama model (#13)
* Add docs to download overture places data & ollama model * Hit local overture parquet files * Add osx gitignore * Add .env.example * Make overture data source selectable using .env * Add pytest marker to set right ENV vars during CI --------- Co-authored-by: Daniel Wiesmann <yellowcap@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
16f39f908f
commit
be8affaa6c
@@ -0,0 +1,16 @@
|
|||||||
|
# Ollama Configuration
|
||||||
|
OLLAMA_AGENT_MODEL=gpt-oss:20b-cloud
|
||||||
|
OLLAMA_IMAGE_MODEL=ministral-3:14b-cloud
|
||||||
|
OLLAMA_BASE_URL=http://localhost:11434
|
||||||
|
|
||||||
|
# Overture Maps Configuration
|
||||||
|
# Source: 'local' or 's3'
|
||||||
|
OVERTURE_SOURCE=local
|
||||||
|
OVERTURE_LOCAL_PATH=data/overture/places/*
|
||||||
|
OVERTURE_S3_PATH=s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*
|
||||||
|
|
||||||
|
# Frontend Configuration
|
||||||
|
# API base URL for the frontend to connect to (default: http://localhost:8000)
|
||||||
|
API_BASE_URL=http://localhost:8000
|
||||||
|
|
||||||
|
AWS_REQUEST_PAYER=requester
|
||||||
@@ -212,3 +212,10 @@ __marimo__/
|
|||||||
# Notebook
|
# Notebook
|
||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
nbs/*
|
nbs/*
|
||||||
|
!nbs/.gitkeep
|
||||||
|
|
||||||
|
# OSX
|
||||||
|
.DS_Store
|
||||||
|
# Data
|
||||||
|
data/*
|
||||||
|
!data/.gitkeep
|
||||||
@@ -18,6 +18,26 @@ Edit `.env` to set your configuration:
|
|||||||
|
|
||||||
The application will automatically load these variables from the `.env` file.
|
The application will automatically load these variables from the `.env` file.
|
||||||
|
|
||||||
|
## Ollama Setup
|
||||||
|
|
||||||
|
Install [Ollama](https://ollama.ai/) and download the required models:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ollama pull ministral-3:14b-cloud
|
||||||
|
ollama pull gpt-oss:20b-cloud
|
||||||
|
```
|
||||||
|
|
||||||
|
These models are used for agent and satellite image analysis.
|
||||||
|
|
||||||
|
## Data Setup
|
||||||
|
|
||||||
|
Download Overture Maps place data locally:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p data/overture/places
|
||||||
|
aws s3 sync s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/ data/overture/places/
|
||||||
|
```
|
||||||
|
|
||||||
## Development Setup
|
## Development Setup
|
||||||
|
|
||||||
### Pre-commit Hooks
|
### Pre-commit Hooks
|
||||||
|
|||||||
@@ -3,11 +3,11 @@ from dotenv import load_dotenv
|
|||||||
|
|
||||||
from langchain_ollama import ChatOllama
|
from langchain_ollama import ChatOllama
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables from env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Get model name from environment variable, default to llama3.2
|
# Get model name from environment variable, default to llama3.2
|
||||||
MODEL_NAME = os.environ.get("OLLAMA_MODEL", "llama3.2")
|
MODEL_NAME = os.environ.get("OLLAMA_AGENT_MODEL", "llama3.2")
|
||||||
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
|
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
|
||||||
|
|
||||||
llm = ChatOllama(
|
llm = ChatOllama(
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
import json
|
import json
|
||||||
|
import os
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
import duckdb
|
import duckdb
|
||||||
|
from dotenv import load_dotenv
|
||||||
from langchain_core.messages import ToolMessage
|
from langchain_core.messages import ToolMessage
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from langchain_core.tools.base import InjectedToolCallId
|
from langchain_core.tools.base import InjectedToolCallId
|
||||||
from langgraph.types import Command
|
from langgraph.types import Command
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
def create_database_connection():
|
def create_database_connection():
|
||||||
"""Create and configure a DuckDB connection with necessary extensions.
|
"""Create and configure a DuckDB connection with necessary extensions.
|
||||||
@@ -33,13 +38,17 @@ async def get_place(
|
|||||||
"""Get place location from Overture Maps based on user input place name."""
|
"""Get place location from Overture Maps based on user input place name."""
|
||||||
|
|
||||||
db_connection = create_database_connection()
|
db_connection = create_database_connection()
|
||||||
|
source = os.getenv("OVERTURE_SOURCE", "local")
|
||||||
|
if source == "s3":
|
||||||
|
data_path = os.getenv("OVERTURE_S3_PATH")
|
||||||
|
db_connection.execute("SET s3_region='us-west-2';")
|
||||||
|
else:
|
||||||
|
data_path = os.getenv("OVERTURE_LOCAL_PATH")
|
||||||
|
|
||||||
location_results = db_connection.execute(
|
location_results = db_connection.execute(
|
||||||
f"""
|
f"""
|
||||||
LOAD spatial;
|
LOAD spatial;
|
||||||
|
|
||||||
SET s3_region='us-west-2';
|
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
id,
|
id,
|
||||||
jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) AS similarity_score,
|
jaro_winkler_similarity(LOWER(names.primary), LOWER('{place_name}')) AS similarity_score,
|
||||||
@@ -48,7 +57,7 @@ async def get_place(
|
|||||||
CAST(socials AS JSON) AS socials,
|
CAST(socials AS JSON) AS socials,
|
||||||
ST_AsGeoJSON(geometry) AS geometry,
|
ST_AsGeoJSON(geometry) AS geometry,
|
||||||
FROM read_parquet(
|
FROM read_parquet(
|
||||||
's3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*',
|
'{data_path}',
|
||||||
filename=true,
|
filename=true,
|
||||||
hive_partitioning=1
|
hive_partitioning=1
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Tools for summarizing satellite images using LLM-based analysis."""
|
"""Tools for summarizing satellite images using LLM-based analysis."""
|
||||||
|
|
||||||
|
import os
|
||||||
from typing import Annotated, Optional
|
from typing import Annotated, Optional
|
||||||
import dspy
|
import dspy
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
@@ -7,6 +8,10 @@ from langgraph.types import Command
|
|||||||
from langchain_core.messages import ToolMessage
|
from langchain_core.messages import ToolMessage
|
||||||
from langchain_core.tools.base import InjectedToolCallId
|
from langchain_core.tools.base import InjectedToolCallId
|
||||||
|
|
||||||
|
import dotenv
|
||||||
|
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
class SatImgSummary(dspy.Signature):
|
class SatImgSummary(dspy.Signature):
|
||||||
"Describe things you see in the satellite image."
|
"Describe things you see in the satellite image."
|
||||||
@@ -20,8 +25,8 @@ class SatImgSummaryAgent(dspy.Module):
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model: str = "ministral-3:14b-cloud",
|
model: str = os.environ.get("OLLAMA_IMAGE_MODEL", "ministral-3:14b-cloud"),
|
||||||
api_base: str = "http://localhost:11434",
|
api_base: str = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434"),
|
||||||
temperature: float = 0.5,
|
temperature: float = 0.5,
|
||||||
max_tokens: int = 4_096,
|
max_tokens: int = 4_096,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|||||||
@@ -1,8 +1,22 @@
|
|||||||
|
import os
|
||||||
|
import pytest
|
||||||
from langchain_core.tools.base import ToolCall
|
from langchain_core.tools.base import ToolCall
|
||||||
|
|
||||||
from geo_assistant.tools.overture import get_place
|
from geo_assistant.tools.overture import get_place
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def setup_ci_env():
|
||||||
|
"""Configure S3 source for CI environments."""
|
||||||
|
# Detect CI environment (GitHub Actions, GitLab CI, etc.)
|
||||||
|
if os.getenv("CI") or os.getenv("GITHUB_ACTIONS"):
|
||||||
|
os.environ["OVERTURE_SOURCE"] = "s3"
|
||||||
|
os.environ["OVERTURE_S3_PATH"] = (
|
||||||
|
"s3://overturemaps-us-west-2/release/2025-11-19.0/theme=places/type=place/*"
|
||||||
|
)
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
async def test_get_place():
|
async def test_get_place():
|
||||||
command = await get_place.ainvoke(
|
command = await get_place.ainvoke(
|
||||||
ToolCall(
|
ToolCall(
|
||||||
|
|||||||
Reference in New Issue
Block a user