Merge pull request #1015 from pocketpaw/dev

Sync dev into main: security hardening, CI fixes, and bug fixes
This commit is contained in:
Prakash Dalai
2026-04-29 12:43:35 +05:30
committed by GitHub
494 changed files with 74984 additions and 1732 deletions

View File

@@ -367,7 +367,7 @@ jobs:
- name: Check for secrets in diff
run: |
DIFF=$(git diff origin/${{ github.event.pull_request.base.ref }}...HEAD -- . ':!uv.lock' ':!*.lock' ':!src/pocketpaw/security/redact.py' ':!tests/test_redact.py' ':!tests/test_pii.py' 2>/dev/null || true)
DIFF=$(git diff origin/${{ github.event.pull_request.base.ref }}...HEAD -- . ':!uv.lock' ':!*.lock' ':!src/pocketpaw/security/redact.py' ':!tests/test_redact.py' ':!tests/test_pii.py' ':!tests/test_logging_scrub.py' 2>/dev/null || true)
if [ -z "$DIFF" ]; then
echo "No diff to scan."
exit 0

1
.gitignore vendored
View File

@@ -77,3 +77,4 @@ private.key
.env.bak
*.bak
output.txt

View File

@@ -6,6 +6,34 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
PocketPaw is a self-hosted AI agent that runs locally and is controlled via Telegram, Discord, Slack, WhatsApp, or a web dashboard. The Python package is named `pocketpaw` (the internal/legacy name), while the public-facing name is `pocketpaw`. Python 3.11+ required.
## Knowledge Base
A codebase wiki lives at `docs/wiki/` — auto-generated from AST analysis + LLM compilation. **Read the relevant wiki article before modifying a module.**
```bash
# Search the KB from terminal
cd /path/to/knowledge-base && kb search "GroupService" --scope paw-cloud
# Show a specific module's wiki
kb show group_service --scope paw-cloud
# Rebuild after big changes (also runs automatically via PostCommit hook)
kb build ./ee/cloud --scope paw-cloud --output docs/wiki/
# Check wiki health
kb lint --scope paw-cloud
```
Key wiki articles for the enterprise cloud module:
- `docs/wiki/index.md` — Full index with all articles
- `docs/wiki/group_service.md` — Chat group CRUD, membership, agents
- `docs/wiki/message_service.md` — Message CRUD, reactions, threads
- `docs/wiki/service.md` (workspace) — Workspace CRUD, members, invites
- `docs/wiki/agent_bridge.md` — Agent orchestration for cloud chat
- `docs/wiki/errors.md` — CloudError hierarchy
The wiki auto-rebuilds on commits that touch `ee/cloud/` files (via `.claude/hooks/kb-rebuild.sh`).
## Commands
```bash

102
connectors/airtable.yaml Normal file
View File

@@ -0,0 +1,102 @@
# Airtable connector — spreadsheet-database hybrid for structured data.
# Created: 2026-03-30
name: airtable
display_name: Airtable
type: database
icon: table
auth:
method: bearer
credentials:
- name: AIRTABLE_TOKEN
description: Airtable personal access token (airtable.com/create/tokens)
required: true
actions:
- name: list_bases
description: List all accessible Airtable bases
method: GET
url: https://api.airtable.com/v0/meta/bases
trust_level: auto
- name: list_tables
description: List tables in a base with their fields
method: GET
url: https://api.airtable.com/v0/meta/bases/{base_id}/tables
params:
base_id: { type: string, required: true, description: "Base ID (starts with app...)" }
trust_level: auto
- name: list_records
description: List records from a table
method: GET
url: https://api.airtable.com/v0/{base_id}/{table_id}
params:
base_id: { type: string, required: true }
table_id: { type: string, required: true, description: "Table name or ID" }
maxRecords: { type: integer, default: 25 }
view: { type: string, description: "View name or ID" }
filterByFormula: { type: string, description: "Airtable formula filter (e.g. {Status} = 'Active')" }
sort: { type: object, description: "[{field: 'Name', direction: 'asc'}]" }
trust_level: auto
- name: get_record
description: Get a specific record by ID
method: GET
url: https://api.airtable.com/v0/{base_id}/{table_id}/{record_id}
params:
base_id: { type: string, required: true }
table_id: { type: string, required: true }
record_id: { type: string, required: true }
trust_level: auto
- name: create_record
description: Create a new record in a table
method: POST
url: https://api.airtable.com/v0/{base_id}/{table_id}
params:
base_id: { type: string, required: true }
table_id: { type: string, required: true }
body:
fields: { type: object, required: true, description: "Field values (e.g. {Name: '...', Status: 'Active'})" }
trust_level: confirm
- name: update_record
description: Update an existing record
method: PATCH
url: https://api.airtable.com/v0/{base_id}/{table_id}/{record_id}
params:
base_id: { type: string, required: true }
table_id: { type: string, required: true }
record_id: { type: string, required: true }
body:
fields: { type: object, required: true, description: "Fields to update" }
trust_level: confirm
- name: search_records
description: Search records with a formula filter
method: GET
url: https://api.airtable.com/v0/{base_id}/{table_id}
params:
base_id: { type: string, required: true }
table_id: { type: string, required: true }
filterByFormula: { type: string, required: true, description: "Airtable formula (e.g. SEARCH('term', {Name}))" }
maxRecords: { type: integer, default: 25 }
trust_level: auto
- name: create_records_batch
description: Create multiple records at once (up to 10)
method: POST
url: https://api.airtable.com/v0/{base_id}/{table_id}
params:
base_id: { type: string, required: true }
table_id: { type: string, required: true }
body:
records: { type: object, required: true, description: "Array of {fields: {...}} objects (max 10)" }
trust_level: confirm
sync:
table: airtable_records
schedule: every_15m
mapping: {}

83
connectors/bigquery.yaml Normal file
View File

@@ -0,0 +1,83 @@
# BigQuery connector — Google Cloud data warehouse.
# Created: 2026-03-30
name: bigquery
display_name: Google BigQuery
type: database
icon: database
auth:
method: bearer
credentials:
- name: GCP_SERVICE_ACCOUNT_KEY
description: GCP service account JSON key (base64-encoded or file path)
required: true
- name: GCP_PROJECT_ID
description: Google Cloud project ID
required: true
actions:
- name: execute_query
description: Execute a SQL query against BigQuery
method: LOCAL
params:
query: { type: string, required: true, description: "Standard SQL query" }
project_id: { type: string, description: "GCP project ID (overrides default)" }
max_results: { type: integer, default: 100 }
use_legacy_sql: { type: boolean, default: false }
trust_level: confirm
- name: list_datasets
description: List datasets in the project
method: LOCAL
params:
project_id: { type: string }
trust_level: auto
- name: list_tables
description: List tables in a dataset
method: LOCAL
params:
dataset_id: { type: string, required: true }
project_id: { type: string }
trust_level: auto
- name: describe_table
description: Get schema for a table
method: LOCAL
params:
dataset_id: { type: string, required: true }
table_id: { type: string, required: true }
project_id: { type: string }
trust_level: auto
- name: preview_table
description: Preview rows from a table
method: LOCAL
params:
dataset_id: { type: string, required: true }
table_id: { type: string, required: true }
limit: { type: integer, default: 20 }
trust_level: auto
- name: list_jobs
description: List recent query jobs
method: LOCAL
params:
project_id: { type: string }
max_results: { type: integer, default: 20 }
state_filter: { type: string, enum: [done, pending, running] }
trust_level: auto
- name: get_job
description: Get details of a specific job
method: LOCAL
params:
job_id: { type: string, required: true }
project_id: { type: string }
trust_level: auto
sync:
table: null
schedule: manual
mapping: {}

113
connectors/confluence.yaml Normal file
View File

@@ -0,0 +1,113 @@
# Confluence connector — Atlassian wiki & documentation platform.
# Created: 2026-03-30
name: confluence
display_name: Confluence
type: knowledge
icon: file-text
auth:
method: basic
credentials:
- name: CONFLUENCE_BASE_URL
description: Confluence instance URL (e.g. https://yourorg.atlassian.net/wiki)
required: true
- name: CONFLUENCE_EMAIL
description: Atlassian account email
required: true
- name: CONFLUENCE_API_TOKEN
description: Atlassian API token
required: true
actions:
- name: search
description: Search Confluence content using CQL
method: GET
url: "{CONFLUENCE_BASE_URL}/rest/api/content/search"
params:
cql: { type: string, required: true, description: "CQL query (e.g. type=page AND text~'search term')" }
limit: { type: integer, default: 25 }
expand: { type: string, default: "space,version,ancestors" }
trust_level: auto
- name: list_spaces
description: List all accessible spaces
method: GET
url: "{CONFLUENCE_BASE_URL}/rest/api/space"
params:
limit: { type: integer, default: 25 }
type: { type: string, enum: [global, personal] }
trust_level: auto
- name: get_page
description: Get a specific page by ID
method: GET
url: "{CONFLUENCE_BASE_URL}/rest/api/content/{page_id}"
params:
page_id: { type: string, required: true }
expand: { type: string, default: "body.storage,version,space,ancestors" }
trust_level: auto
- name: get_page_children
description: List child pages of a page
method: GET
url: "{CONFLUENCE_BASE_URL}/rest/api/content/{page_id}/child/page"
params:
page_id: { type: string, required: true }
limit: { type: integer, default: 25 }
trust_level: auto
- name: list_space_pages
description: List pages in a specific space
method: GET
url: "{CONFLUENCE_BASE_URL}/rest/api/content"
params:
spaceKey: { type: string, required: true, description: "Space key (e.g. TEAM)" }
limit: { type: integer, default: 25 }
trust_level: auto
- name: create_page
description: Create a new Confluence page
method: POST
url: "{CONFLUENCE_BASE_URL}/rest/api/content"
body:
type: { type: string, default: "page" }
title: { type: string, required: true }
space: { type: object, required: true, description: "{key: 'SPACEKEY'}" }
body: { type: object, required: true, description: "{storage: {value: '<p>HTML content</p>', representation: 'storage'}}" }
ancestors: { type: object, description: "[{id: 'parent_page_id'}]" }
trust_level: confirm
- name: update_page
description: Update an existing Confluence page
method: PUT
url: "{CONFLUENCE_BASE_URL}/rest/api/content/{page_id}"
params:
page_id: { type: string, required: true }
body:
version: { type: object, required: true, description: "{number: current_version + 1}" }
title: { type: string, required: true }
type: { type: string, default: "page" }
body: { type: object, required: true, description: "{storage: {value: '<p>Updated HTML</p>', representation: 'storage'}}" }
trust_level: confirm
- name: get_page_comments
description: Get comments on a page
method: GET
url: "{CONFLUENCE_BASE_URL}/rest/api/content/{page_id}/child/comment"
params:
page_id: { type: string, required: true }
limit: { type: integer, default: 25 }
expand: { type: string, default: "body.storage,version" }
trust_level: auto
sync:
table: confluence_pages
schedule: every_30m
mapping:
id: id
title: title
space: space.key
url: _links.webui
version: version.number
updated: version.when

72
connectors/csv.yaml Normal file
View File

@@ -0,0 +1,72 @@
# CSV connector — import local CSV/Excel/JSON files into pocket.db.
# Created: 2026-03-27
# Updated: 2026-03-30 — Added JSON/Parquet import, preview, query, export, and stats actions.
name: csv
display_name: File Import
type: file
icon: file-spreadsheet
auth:
method: none
credentials: []
actions:
- name: import_file
description: Import a CSV, Excel, JSON, or Parquet file into a pocket table
method: LOCAL
params:
file_path: { type: string, required: true, description: "Path to the file (CSV, XLSX, JSON, Parquet)" }
table_name: { type: string, description: "Target table name (defaults to filename)" }
delimiter: { type: string, default: ",", description: "Column delimiter (CSV only)" }
has_header: { type: boolean, default: true }
encoding: { type: string, default: "utf-8" }
sheet_name: { type: string, description: "Sheet name for Excel files (defaults to first sheet)" }
trust_level: auto
- name: list_tables
description: List imported tables in this pocket
method: LOCAL
trust_level: auto
- name: preview_table
description: Preview rows from an imported table
method: LOCAL
params:
table_name: { type: string, required: true }
limit: { type: integer, default: 20 }
trust_level: auto
- name: table_stats
description: Get row count, column types, and basic stats for a table
method: LOCAL
params:
table_name: { type: string, required: true }
trust_level: auto
- name: query_table
description: Run a SQL query against imported tables
method: LOCAL
params:
query: { type: string, required: true, description: "SQL query (e.g. SELECT * FROM my_table WHERE amount > 100)" }
trust_level: auto
- name: drop_table
description: Remove an imported table from the pocket
method: LOCAL
params:
table_name: { type: string, required: true }
trust_level: confirm
- name: export_table
description: Export a table to CSV file
method: LOCAL
params:
table_name: { type: string, required: true }
output_path: { type: string, required: true, description: "Path for the exported file" }
format: { type: string, enum: [csv, json], default: csv }
trust_level: auto
sync:
table: null
schedule: manual
mapping: {}

114
connectors/datadog.yaml Normal file
View File

@@ -0,0 +1,114 @@
# Datadog connector — monitoring, APM, and observability.
# Created: 2026-03-30
name: datadog
display_name: Datadog
type: observability
icon: activity
auth:
method: api_key
credentials:
- name: DD_API_KEY
description: Datadog API key
required: true
- name: DD_APP_KEY
description: Datadog application key
required: true
- name: DD_SITE
description: Datadog site (e.g. datadoghq.com, datadoghq.eu, us5.datadoghq.com)
required: false
actions:
- name: list_monitors
description: List configured monitors and their statuses
method: GET
url: https://api.datadoghq.com/api/v1/monitor
params:
page_size: { type: integer, default: 25 }
group_states: { type: string, default: "alert,warn,no data" }
trust_level: auto
- name: search_monitors
description: Search monitors by query
method: GET
url: https://api.datadoghq.com/api/v1/monitor/search
params:
query: { type: string, required: true, description: "Search query (e.g. tag:env:production status:alert)" }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: query_metrics
description: Query time-series metrics
method: GET
url: https://api.datadoghq.com/api/v1/query
params:
query: { type: string, required: true, description: "Datadog metrics query (e.g. avg:system.cpu.user{env:production})" }
from: { type: integer, required: true, description: "Start timestamp (epoch seconds)" }
to: { type: integer, required: true, description: "End timestamp (epoch seconds)" }
trust_level: auto
- name: list_events
description: List recent events
method: GET
url: https://api.datadoghq.com/api/v1/events
params:
start: { type: integer, required: true, description: "Start timestamp (epoch seconds)" }
end: { type: integer, required: true, description: "End timestamp (epoch seconds)" }
priority: { type: string, enum: [normal, low] }
trust_level: auto
- name: list_dashboards
description: List all dashboards
method: GET
url: https://api.datadoghq.com/api/v1/dashboard
trust_level: auto
- name: list_hosts
description: List infrastructure hosts
method: GET
url: https://api.datadoghq.com/api/v1/hosts
params:
count: { type: integer, default: 25 }
filter: { type: string, description: "Filter by hostname, tag, etc." }
trust_level: auto
- name: list_downtimes
description: List scheduled downtimes
method: GET
url: https://api.datadoghq.com/api/v1/downtime
trust_level: auto
- name: search_logs
description: Search logs
method: POST
url: https://api.datadoghq.com/api/v2/logs/events/search
body:
filter:
type: object
required: true
description: "{query: '@service:web-app status:error', from: 'now-1h', to: 'now'}"
page: { type: object, default: { limit: 25 } }
trust_level: auto
- name: mute_monitor
description: Mute a monitor
method: POST
url: https://api.datadoghq.com/api/v1/monitor/{monitor_id}/mute
params:
monitor_id: { type: integer, required: true }
body:
end: { type: integer, description: "End timestamp for the mute (epoch seconds)" }
scope: { type: string, description: "Scope to mute (e.g. env:staging)" }
trust_level: confirm
sync:
table: datadog_monitors
schedule: every_5m
mapping:
id: id
name: name
type: type
status: overall_state
query: query
updated: modified

68
connectors/drive.yaml Normal file
View File

@@ -0,0 +1,68 @@
# Google Drive connector — live file search, content fetch, and revision history.
# Created: 2026-04-16
# Pairs with src/pocketpaw/connectors/drive/ for the SourceAdapter surface.
# Auth is OAuth 2.0 (bearer token) — either from the credential broker at
# dispatch time or from GOOGLE_OAUTH_TOKEN for local dev.
name: drive
display_name: Google Drive
type: knowledge
icon: cloud
auth:
method: bearer
credentials:
- name: GOOGLE_OAUTH_TOKEN
description: OAuth access token with https://www.googleapis.com/auth/drive.readonly (plus drive.file for writes)
required: true
actions:
- name: list_files
description: List or browse files in Drive, newest first
method: GET
url: https://www.googleapis.com/drive/v3/files
params:
q: { type: string, description: "Drive search query (e.g. \"name contains 'forecast'\")" }
page_size: { type: integer, default: 20, description: "Max results (1-100)" }
order_by: { type: string, default: "modifiedTime desc" }
trust_level: auto
- name: search_files
description: Full-text search across Drive file contents and metadata
method: GET
url: https://www.googleapis.com/drive/v3/files
params:
query: { type: string, required: true, description: "Free-text search term; wrapped into fullText contains" }
page_size: { type: integer, default: 20 }
trust_level: auto
- name: get_file_content
description: Fetch the content of a single file, exporting Google Docs to PDF
method: GET
url: https://www.googleapis.com/drive/v3/files/{file_id}
params:
file_id: { type: string, required: true, description: "Drive file ID" }
revision_id: { type: string, description: "Specific revision to fetch (defaults to latest)" }
trust_level: auto
- name: get_file_revisions
description: List the edit history of a file for point-in-time retrieval
method: GET
url: https://www.googleapis.com/drive/v3/files/{file_id}/revisions
params:
file_id: { type: string, required: true }
page_size: { type: integer, default: 50 }
trust_level: auto
sync:
# Drive is primarily live-federated via SourceAdapter — no batch sync table.
# The ingest path (DriveIngestAdapter, future) would populate drive_files.
table: drive_files
schedule: manual
mapping:
id: id
name: name
mime_type: mimeType
modified: modifiedTime
size: size
web_view_link: webViewLink

136
connectors/firebase.yaml Normal file
View File

@@ -0,0 +1,136 @@
# Firebase connector — wraps Firebase CLI (firebase-tools) for project management,
# Firestore, Auth, Hosting, Cloud Functions, Remote Config, and Extensions.
# Created: 2026-04-01
name: firebase
display_name: Firebase
type: cloud
icon: flame
auth:
method: none # Firebase CLI uses its own auth (firebase login)
credentials:
- name: FIREBASE_PROJECT
description: "Firebase project ID (optional, uses default if not set)"
required: false
actions:
# -- Project Management --
- name: list_projects
description: List all Firebase projects you have access to
method: LOCAL
trust_level: auto
- name: get_project
description: Get details of a specific Firebase project
method: LOCAL
params:
project_id: { type: string, required: true, description: "Firebase project ID" }
trust_level: auto
# -- Firestore --
- name: firestore_list_collections
description: List Firestore indexes and collection info for the project
method: LOCAL
params:
database: { type: string, description: "Database ID (defaults to (default))" }
trust_level: auto
- name: firestore_databases_list
description: List all Firestore databases in the project
method: LOCAL
trust_level: auto
- name: firestore_get
description: Get a Firestore document or collection at the given path
method: LOCAL
params:
path: { type: string, required: true, description: "Document path (e.g. users/uid123)" }
database: { type: string, description: "Database ID (defaults to (default))" }
trust_level: auto
- name: firestore_delete
description: Delete a Firestore document at the given path
method: LOCAL
params:
path: { type: string, required: true, description: "Document path to delete" }
recursive: { type: boolean, default: false, description: "Recursively delete subcollections" }
database: { type: string, description: "Database ID (defaults to (default))" }
trust_level: confirm
- name: firestore_export
description: Export Firestore data to a GCS bucket
method: LOCAL
params:
destination: { type: string, required: true, description: "GCS bucket URI (gs://bucket-name)" }
collection_ids: { type: string, description: "Comma-separated collection IDs to export" }
database: { type: string, description: "Database ID (defaults to (default))" }
trust_level: confirm
# -- Authentication --
- name: auth_list_users
description: Export user accounts from Firebase Auth as JSON
method: LOCAL
params:
format: { type: string, enum: [json, csv], default: json, description: "Output format" }
trust_level: auto
- name: auth_import_users
description: Import users into Firebase Auth from a data file
method: LOCAL
params:
data_file: { type: string, required: true, description: "Path to CSV or JSON user data file" }
trust_level: restricted
# -- Hosting --
- name: hosting_list_sites
description: List all Firebase Hosting sites in the project
method: LOCAL
trust_level: auto
- name: hosting_deploy
description: Deploy to Firebase Hosting
method: LOCAL
params:
site: { type: string, description: "Specific hosting site to deploy to" }
trust_level: restricted
# -- Cloud Functions --
- name: functions_list
description: List all deployed Cloud Functions in the project
method: LOCAL
trust_level: auto
- name: functions_log
description: View recent Cloud Functions logs
method: LOCAL
params:
function_name: { type: string, description: "Filter logs to a specific function" }
limit: { type: integer, default: 50, description: "Number of log entries to fetch" }
trust_level: auto
- name: functions_deploy
description: Deploy Cloud Functions to Firebase
method: LOCAL
params:
function_name: { type: string, description: "Deploy only a specific function" }
trust_level: restricted
# -- Remote Config --
- name: remoteconfig_get
description: Get the current Remote Config template for the project
method: LOCAL
params:
version_number: { type: string, description: "Specific version number to fetch" }
trust_level: auto
# -- Extensions --
- name: extensions_list
description: List installed Firebase Extensions in the project
method: LOCAL
trust_level: auto
sync:
table: null
schedule: manual
mapping: {}

111
connectors/freshdesk.yaml Normal file
View File

@@ -0,0 +1,111 @@
# Freshdesk connector — customer support and helpdesk.
# Created: 2026-03-30
name: freshdesk
display_name: Freshdesk
type: support
icon: life-buoy
auth:
method: api_key
credentials:
- name: FRESHDESK_DOMAIN
description: Freshdesk domain (e.g. yourcompany in yourcompany.freshdesk.com)
required: true
- name: FRESHDESK_API_KEY
description: Freshdesk API key (Profile Settings → Your API Key)
required: true
actions:
- name: list_tickets
description: List support tickets with optional filters
method: GET
url: "https://{FRESHDESK_DOMAIN}.freshdesk.com/api/v2/tickets"
params:
filter: { type: string, enum: [new_and_my_open, watching, spam, deleted], default: "new_and_my_open" }
order_by: { type: string, enum: [created_at, due_by, updated_at], default: updated_at }
order_type: { type: string, enum: [asc, desc], default: desc }
per_page: { type: integer, default: 30 }
trust_level: auto
- name: search_tickets
description: Search tickets using Freshdesk query language
method: GET
url: "https://{FRESHDESK_DOMAIN}.freshdesk.com/api/v2/search/tickets"
params:
query: { type: string, required: true, description: "Search query (e.g. \"status:2 AND priority:3\")" }
trust_level: auto
- name: get_ticket
description: Get ticket details including conversations
method: GET
url: "https://{FRESHDESK_DOMAIN}.freshdesk.com/api/v2/tickets/{ticket_id}"
params:
ticket_id: { type: integer, required: true }
include: { type: string, default: "conversations,requester,stats" }
trust_level: auto
- name: create_ticket
description: Create a new support ticket
method: POST
url: "https://{FRESHDESK_DOMAIN}.freshdesk.com/api/v2/tickets"
body:
subject: { type: string, required: true }
description: { type: string, required: true }
email: { type: string, required: true }
priority: { type: integer, enum: [1, 2, 3, 4], default: 1, description: "1=Low, 2=Medium, 3=High, 4=Urgent" }
status: { type: integer, enum: [2, 3, 4, 5], default: 2, description: "2=Open, 3=Pending, 4=Resolved, 5=Closed" }
type: { type: string, description: "Ticket type (e.g. Incident, Problem, Request)" }
trust_level: confirm
- name: reply_to_ticket
description: Add a reply to a ticket
method: POST
url: "https://{FRESHDESK_DOMAIN}.freshdesk.com/api/v2/tickets/{ticket_id}/reply"
params:
ticket_id: { type: integer, required: true }
body:
body: { type: string, required: true, description: "Reply content (HTML)" }
trust_level: confirm
- name: update_ticket
description: Update ticket properties
method: PUT
url: "https://{FRESHDESK_DOMAIN}.freshdesk.com/api/v2/tickets/{ticket_id}"
params:
ticket_id: { type: integer, required: true }
body:
status: { type: integer }
priority: { type: integer }
agent_id: { type: integer }
group_id: { type: integer }
trust_level: confirm
- name: list_agents
description: List helpdesk agents
method: GET
url: "https://{FRESHDESK_DOMAIN}.freshdesk.com/api/v2/agents"
params:
per_page: { type: integer, default: 50 }
trust_level: auto
- name: list_contacts
description: List customer contacts
method: GET
url: "https://{FRESHDESK_DOMAIN}.freshdesk.com/api/v2/contacts"
params:
per_page: { type: integer, default: 30 }
trust_level: auto
sync:
table: freshdesk_tickets
schedule: every_15m
mapping:
id: id
subject: subject
status: status
priority: priority
requester: requester_id
agent: responder_id
created: created_at
updated: updated_at

162
connectors/gcp.yaml Normal file
View File

@@ -0,0 +1,162 @@
# GCP connector — Google Cloud Platform via gcloud CLI.
# Created: 2026-04-01
# Wraps gcloud CLI (Google Cloud SDK) for projects, storage, pubsub,
# cloud run, secrets, logging, compute, and IAM operations.
name: gcp
display_name: Google Cloud Platform
type: cloud
icon: cloud
auth:
method: none # gcloud CLI uses its own auth (gcloud auth login)
credentials:
- name: GCP_PROJECT
description: "GCP project ID (optional, uses default if not set)"
required: false
- name: GCP_REGION
description: "Default region (e.g., us-central1)"
required: false
actions:
# -- Projects --
- name: list_projects
description: List all accessible GCP projects
method: LOCAL
trust_level: auto
- name: get_project
description: Get details of a specific GCP project
method: LOCAL
params:
project_id: { type: string, required: true, description: "GCP project ID" }
trust_level: auto
# -- Cloud Storage --
- name: storage_list_buckets
description: List Cloud Storage buckets in the project
method: LOCAL
trust_level: auto
- name: storage_list_objects
description: List objects in a Cloud Storage bucket
method: LOCAL
params:
bucket: { type: string, required: true, description: "Bucket name (without gs:// prefix)" }
prefix: { type: string, description: "Object name prefix filter" }
trust_level: auto
- name: storage_get_object
description: Read the contents of a Cloud Storage object
method: LOCAL
params:
bucket: { type: string, required: true, description: "Bucket name" }
path: { type: string, required: true, description: "Object path within the bucket" }
trust_level: auto
- name: storage_copy
description: Copy a file to/from Cloud Storage
method: LOCAL
params:
src: { type: string, required: true, description: "Source path (local or gs://bucket/path)" }
dest: { type: string, required: true, description: "Destination path (local or gs://bucket/path)" }
trust_level: confirm
- name: storage_delete
description: Delete an object from Cloud Storage
method: LOCAL
params:
bucket: { type: string, required: true, description: "Bucket name" }
path: { type: string, required: true, description: "Object path to delete" }
trust_level: restricted
# -- Pub/Sub --
- name: pubsub_list_topics
description: List Pub/Sub topics in the project
method: LOCAL
trust_level: auto
- name: pubsub_list_subscriptions
description: List Pub/Sub subscriptions in the project
method: LOCAL
trust_level: auto
- name: pubsub_publish
description: Publish a message to a Pub/Sub topic
method: LOCAL
params:
topic: { type: string, required: true, description: "Topic name or full resource path" }
message: { type: string, required: true, description: "Message body to publish" }
trust_level: confirm
# -- Cloud Run --
- name: run_list_services
description: List Cloud Run services
method: LOCAL
trust_level: auto
- name: run_describe_service
description: Get details of a Cloud Run service
method: LOCAL
params:
name: { type: string, required: true, description: "Service name" }
trust_level: auto
- name: run_list_revisions
description: List Cloud Run revisions
method: LOCAL
trust_level: auto
# -- Secret Manager --
- name: secrets_list
description: List secrets in Secret Manager
method: LOCAL
trust_level: auto
- name: secrets_get
description: Access the latest version of a secret
method: LOCAL
params:
name: { type: string, required: true, description: "Secret name" }
trust_level: confirm
- name: secrets_create
description: Create a new secret in Secret Manager
method: LOCAL
params:
name: { type: string, required: true, description: "Secret name to create" }
trust_level: restricted
# -- Logging --
- name: logs_read
description: Read log entries from Cloud Logging
method: LOCAL
params:
filter: { type: string, description: "Log filter expression (e.g., resource.type=cloud_run_revision)" }
limit: { type: integer, default: 50, description: "Maximum number of entries to return" }
trust_level: auto
# -- Compute Engine --
- name: compute_list_instances
description: List Compute Engine VM instances
method: LOCAL
trust_level: auto
- name: compute_describe_instance
description: Get details of a Compute Engine VM instance
method: LOCAL
params:
name: { type: string, required: true, description: "Instance name" }
zone: { type: string, description: "Zone (e.g., us-central1-a)" }
trust_level: auto
# -- IAM --
- name: iam_list_accounts
description: List IAM service accounts in the project
method: LOCAL
trust_level: auto
sync:
table: null
schedule: manual
mapping: {}

136
connectors/github.yaml Normal file
View File

@@ -0,0 +1,136 @@
# GitHub connector — repositories, issues, PRs, and code search.
# Created: 2026-03-30
name: github
display_name: GitHub
type: developer
icon: git-branch
auth:
method: bearer
headers:
Accept: application/vnd.github+json
X-GitHub-Api-Version: "2022-11-28"
credentials:
- name: GITHUB_TOKEN
description: GitHub personal access token (classic or fine-grained)
required: true
actions:
- name: list_repos
description: List repositories for the authenticated user
method: GET
url: https://api.github.com/user/repos
params:
sort: { type: string, enum: [created, updated, pushed, full_name], default: updated }
per_page: { type: integer, default: 100 }
page: { type: integer, default: 1, description: "Page number for pagination" }
type: { type: string, enum: [all, owner, public, private, member], default: all }
trust_level: auto
- name: list_org_repos
description: List repositories for an organization
method: GET
url: https://api.github.com/orgs/{org}/repos
params:
org: { type: string, required: true, description: "Organization name" }
per_page: { type: integer, default: 100 }
page: { type: integer, default: 1, description: "Page number for pagination" }
sort: { type: string, enum: [created, updated, pushed, full_name], default: updated }
trust_level: auto
- name: list_issues
description: List issues for a repository
method: GET
url: https://api.github.com/repos/{owner}/{repo}/issues
params:
owner: { type: string, required: true }
repo: { type: string, required: true }
state: { type: string, enum: [open, closed, all], default: open }
labels: { type: string, description: "Comma-separated label names" }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: list_pull_requests
description: List pull requests for a repository
method: GET
url: https://api.github.com/repos/{owner}/{repo}/pulls
params:
owner: { type: string, required: true }
repo: { type: string, required: true }
state: { type: string, enum: [open, closed, all], default: open }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: search_code
description: Search code across GitHub repositories
method: GET
url: https://api.github.com/search/code
params:
q: { type: string, required: true, description: "Search query (e.g. 'filename:config.yaml org:myorg')" }
per_page: { type: integer, default: 20 }
trust_level: auto
- name: search_issues
description: Search issues and PRs across repositories
method: GET
url: https://api.github.com/search/issues
params:
q: { type: string, required: true, description: "Search query (e.g. 'is:issue is:open label:bug repo:owner/repo')" }
per_page: { type: integer, default: 20 }
trust_level: auto
- name: get_repo
description: Get repository details
method: GET
url: https://api.github.com/repos/{owner}/{repo}
params:
owner: { type: string, required: true }
repo: { type: string, required: true }
trust_level: auto
- name: create_issue
description: Create a new issue
method: POST
url: https://api.github.com/repos/{owner}/{repo}/issues
params:
owner: { type: string, required: true }
repo: { type: string, required: true }
body:
title: { type: string, required: true }
body: { type: string }
labels: { type: object, description: "Array of label names" }
assignees: { type: object, description: "Array of usernames" }
trust_level: confirm
- name: list_actions_runs
description: List recent GitHub Actions workflow runs
method: GET
url: https://api.github.com/repos/{owner}/{repo}/actions/runs
params:
owner: { type: string, required: true }
repo: { type: string, required: true }
status: { type: string, enum: [completed, in_progress, queued, failure, success] }
per_page: { type: integer, default: 10 }
trust_level: auto
- name: list_releases
description: List releases for a repository
method: GET
url: https://api.github.com/repos/{owner}/{repo}/releases
params:
owner: { type: string, required: true }
repo: { type: string, required: true }
per_page: { type: integer, default: 10 }
trust_level: auto
sync:
table: github_repos
schedule: every_30m
mapping:
id: id
name: full_name
description: description
stars: stargazers_count
language: language
updated: updated_at

108
connectors/gitlab.yaml Normal file
View File

@@ -0,0 +1,108 @@
# GitLab connector — repositories, issues, merge requests, and CI/CD.
# Created: 2026-03-30
name: gitlab
display_name: GitLab
type: developer
icon: git-merge
auth:
method: bearer
credentials:
- name: GITLAB_TOKEN
description: GitLab personal access token
required: true
- name: GITLAB_BASE_URL
description: GitLab instance URL (default api.gitlab.com for cloud)
required: false
actions:
- name: list_projects
description: List projects accessible to the authenticated user
method: GET
url: https://gitlab.com/api/v4/projects
params:
membership: { type: boolean, default: true }
order_by: { type: string, enum: [id, name, created_at, updated_at, last_activity_at], default: last_activity_at }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: list_issues
description: List issues for a project
method: GET
url: https://gitlab.com/api/v4/projects/{project_id}/issues
params:
project_id: { type: string, required: true, description: "Project ID or URL-encoded path" }
state: { type: string, enum: [opened, closed, all], default: opened }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: list_merge_requests
description: List merge requests for a project
method: GET
url: https://gitlab.com/api/v4/projects/{project_id}/merge_requests
params:
project_id: { type: string, required: true }
state: { type: string, enum: [opened, closed, merged, all], default: opened }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: list_pipelines
description: List CI/CD pipelines for a project
method: GET
url: https://gitlab.com/api/v4/projects/{project_id}/pipelines
params:
project_id: { type: string, required: true }
status: { type: string, enum: [running, pending, success, failed, canceled, skipped] }
per_page: { type: integer, default: 20 }
trust_level: auto
- name: get_pipeline_jobs
description: List jobs in a specific pipeline
method: GET
url: https://gitlab.com/api/v4/projects/{project_id}/pipelines/{pipeline_id}/jobs
params:
project_id: { type: string, required: true }
pipeline_id: { type: integer, required: true }
trust_level: auto
- name: search_projects
description: Search for projects by name
method: GET
url: https://gitlab.com/api/v4/projects
params:
search: { type: string, required: true }
per_page: { type: integer, default: 20 }
trust_level: auto
- name: create_issue
description: Create a new project issue
method: POST
url: https://gitlab.com/api/v4/projects/{project_id}/issues
params:
project_id: { type: string, required: true }
body:
title: { type: string, required: true }
description: { type: string }
labels: { type: string, description: "Comma-separated label names" }
assignee_ids: { type: object, description: "Array of user IDs" }
trust_level: confirm
- name: list_environments
description: List deployment environments
method: GET
url: https://gitlab.com/api/v4/projects/{project_id}/environments
params:
project_id: { type: string, required: true }
per_page: { type: integer, default: 20 }
trust_level: auto
sync:
table: gitlab_projects
schedule: every_30m
mapping:
id: id
name: path_with_namespace
description: description
stars: star_count
updated: last_activity_at

100
connectors/hubspot.yaml Normal file
View File

@@ -0,0 +1,100 @@
# HubSpot connector — CRM, marketing, and sales data.
# Created: 2026-03-30
name: hubspot
display_name: HubSpot
type: crm
icon: target
auth:
method: bearer
credentials:
- name: HUBSPOT_ACCESS_TOKEN
description: HubSpot private app access token
required: true
actions:
- name: list_contacts
description: List CRM contacts
method: GET
url: https://api.hubapi.com/crm/v3/objects/contacts
params:
limit: { type: integer, default: 20 }
properties: { type: string, default: "firstname,lastname,email,phone,company,lifecyclestage" }
trust_level: auto
- name: list_companies
description: List CRM companies
method: GET
url: https://api.hubapi.com/crm/v3/objects/companies
params:
limit: { type: integer, default: 20 }
properties: { type: string, default: "name,domain,industry,annualrevenue,numberofemployees" }
trust_level: auto
- name: list_deals
description: List sales deals
method: GET
url: https://api.hubapi.com/crm/v3/objects/deals
params:
limit: { type: integer, default: 20 }
properties: { type: string, default: "dealname,amount,dealstage,closedate,pipeline" }
trust_level: auto
- name: list_tickets
description: List support tickets
method: GET
url: https://api.hubapi.com/crm/v3/objects/tickets
params:
limit: { type: integer, default: 20 }
properties: { type: string, default: "subject,content,hs_pipeline_stage,hs_ticket_priority,createdate" }
trust_level: auto
- name: search_contacts
description: Search contacts by query
method: POST
url: https://api.hubapi.com/crm/v3/objects/contacts/search
body:
query: { type: string, required: true, description: "Search term (name, email, etc.)" }
limit: { type: integer, default: 10 }
trust_level: auto
- name: create_contact
description: Create a new CRM contact
method: POST
url: https://api.hubapi.com/crm/v3/objects/contacts
body:
properties:
type: object
required: true
description: "Contact properties: {firstname, lastname, email, phone, company}"
trust_level: confirm
- name: create_deal
description: Create a new sales deal
method: POST
url: https://api.hubapi.com/crm/v3/objects/deals
body:
properties:
type: object
required: true
description: "Deal properties: {dealname, amount, dealstage, pipeline, closedate}"
trust_level: confirm
- name: list_owners
description: List HubSpot users/owners
method: GET
url: https://api.hubapi.com/crm/v3/owners
params:
limit: { type: integer, default: 50 }
trust_level: auto
sync:
table: hubspot_contacts
schedule: every_15m
mapping:
id: id
email: properties.email
first_name: properties.firstname
last_name: properties.lastname
company: properties.company

112
connectors/intercom.yaml Normal file
View File

@@ -0,0 +1,112 @@
# Intercom connector — customer messaging and engagement platform.
# Created: 2026-03-30
name: intercom
display_name: Intercom
type: support
icon: message-circle
auth:
method: bearer
credentials:
- name: INTERCOM_ACCESS_TOKEN
description: Intercom access token (Settings → Integrations → Developer Hub)
required: true
actions:
- name: list_conversations
description: List recent conversations
method: GET
url: https://api.intercom.io/conversations
params:
per_page: { type: integer, default: 20 }
sort_field: { type: string, default: "updated_at" }
sort_order: { type: string, enum: [ascending, descending], default: descending }
trust_level: auto
- name: search_conversations
description: Search conversations by query
method: POST
url: https://api.intercom.io/conversations/search
body:
query:
type: object
required: true
description: "{field: 'source.body', operator: '~', value: 'search term'}"
pagination: { type: object, default: { per_page: 20 } }
trust_level: auto
- name: get_conversation
description: Get a specific conversation with messages
method: GET
url: https://api.intercom.io/conversations/{conversation_id}
params:
conversation_id: { type: string, required: true }
trust_level: auto
- name: list_contacts
description: List contacts (users and leads)
method: GET
url: https://api.intercom.io/contacts
params:
per_page: { type: integer, default: 25 }
trust_level: auto
- name: search_contacts
description: Search contacts by email, name, or other fields
method: POST
url: https://api.intercom.io/contacts/search
body:
query:
type: object
required: true
description: "{field: 'email', operator: '=', value: 'user@example.com'}"
trust_level: auto
- name: reply_to_conversation
description: Send a reply to a conversation
method: POST
url: https://api.intercom.io/conversations/{conversation_id}/reply
params:
conversation_id: { type: string, required: true }
body:
message_type: { type: string, default: "comment" }
type: { type: string, default: "admin" }
admin_id: { type: string, required: true }
body: { type: string, required: true }
trust_level: confirm
- name: create_contact
description: Create a new contact
method: POST
url: https://api.intercom.io/contacts
body:
role: { type: string, enum: [user, lead], default: "user" }
email: { type: string, required: true }
name: { type: string }
phone: { type: string }
custom_attributes: { type: object }
trust_level: confirm
- name: list_tags
description: List all tags
method: GET
url: https://api.intercom.io/tags
trust_level: auto
- name: list_segments
description: List customer segments
method: GET
url: https://api.intercom.io/segments
trust_level: auto
sync:
table: intercom_conversations
schedule: every_15m
mapping:
id: id
title: source.subject
state: state
assignee: assignee.name
created: created_at
updated: updated_at

115
connectors/jira.yaml Normal file
View File

@@ -0,0 +1,115 @@
# Jira connector — Atlassian project & issue tracking.
# Created: 2026-03-30
name: jira
display_name: Jira
type: project-management
icon: kanban
auth:
method: basic
credentials:
- name: JIRA_BASE_URL
description: Jira instance URL (e.g. https://yourorg.atlassian.net)
required: true
- name: JIRA_EMAIL
description: Atlassian account email
required: true
- name: JIRA_API_TOKEN
description: Atlassian API token (create at id.atlassian.net/manage-profile/security/api-tokens)
required: true
actions:
- name: search_issues
description: Search issues using JQL
method: GET
url: "{JIRA_BASE_URL}/rest/api/3/search"
params:
jql: { type: string, required: true, description: "JQL query (e.g. project = PROJ AND status != Done)" }
maxResults: { type: integer, default: 25 }
fields: { type: string, default: "summary,status,assignee,priority,created,updated,issuetype" }
trust_level: auto
- name: get_issue
description: Get a specific issue by key
method: GET
url: "{JIRA_BASE_URL}/rest/api/3/issue/{issue_key}"
params:
issue_key: { type: string, required: true, description: "Issue key (e.g. PROJ-123)" }
trust_level: auto
- name: list_projects
description: List all accessible projects
method: GET
url: "{JIRA_BASE_URL}/rest/api/3/project"
params:
maxResults: { type: integer, default: 50 }
trust_level: auto
- name: list_sprints
description: List sprints for a board
method: GET
url: "{JIRA_BASE_URL}/rest/agile/1.0/board/{board_id}/sprint"
params:
board_id: { type: integer, required: true }
state: { type: string, enum: [active, future, closed], default: active }
trust_level: auto
- name: create_issue
description: Create a new Jira issue
method: POST
url: "{JIRA_BASE_URL}/rest/api/3/issue"
body:
fields:
type: object
required: true
description: "{project: {key: 'PROJ'}, summary: '...', issuetype: {name: 'Task'}, description: {...}}"
trust_level: confirm
- name: transition_issue
description: Move an issue to a new status
method: POST
url: "{JIRA_BASE_URL}/rest/api/3/issue/{issue_key}/transitions"
params:
issue_key: { type: string, required: true }
body:
transition:
type: object
required: true
description: "{id: 'transition_id'}"
trust_level: confirm
- name: add_comment
description: Add a comment to an issue
method: POST
url: "{JIRA_BASE_URL}/rest/api/3/issue/{issue_key}/comment"
params:
issue_key: { type: string, required: true }
body:
body:
type: object
required: true
description: "Atlassian Document Format comment body"
trust_level: confirm
- name: assign_issue
description: Assign an issue to a user
method: PUT
url: "{JIRA_BASE_URL}/rest/api/3/issue/{issue_key}/assignee"
params:
issue_key: { type: string, required: true }
body:
accountId: { type: string, required: true, description: "Atlassian account ID of the assignee" }
trust_level: confirm
sync:
table: jira_issues
schedule: every_15m
mapping:
id: id
key: key
summary: fields.summary
status: fields.status.name
assignee: fields.assignee.displayName
priority: fields.priority.name
type: fields.issuetype.name

109
connectors/linear.yaml Normal file
View File

@@ -0,0 +1,109 @@
# Linear connector — modern issue tracking & project management.
# Created: 2026-03-30
name: linear
display_name: Linear
type: project-management
icon: triangle
auth:
method: bearer
credentials:
- name: LINEAR_API_KEY
description: Linear API key (Settings → API → Personal API keys)
required: true
actions:
- name: list_issues
description: List issues assigned to you or filtered by team/project
method: POST
url: https://api.linear.app/graphql
body:
query:
type: string
default: "{ issues(first: 25, orderBy: updatedAt) { nodes { id identifier title state { name } assignee { name } priority priorityLabel createdAt updatedAt } } }"
trust_level: auto
- name: list_my_issues
description: List issues assigned to the authenticated user
method: POST
url: https://api.linear.app/graphql
body:
query:
type: string
default: "{ viewer { assignedIssues(first: 25, orderBy: updatedAt) { nodes { id identifier title state { name } priority priorityLabel project { name } createdAt } } } }"
trust_level: auto
- name: list_teams
description: List all teams in the workspace
method: POST
url: https://api.linear.app/graphql
body:
query:
type: string
default: "{ teams { nodes { id name key description } } }"
trust_level: auto
- name: list_projects
description: List all projects
method: POST
url: https://api.linear.app/graphql
body:
query:
type: string
default: "{ projects(first: 50, orderBy: updatedAt) { nodes { id name state startDate targetDate progress lead { name } } } }"
trust_level: auto
- name: search_issues
description: Search issues by query string
method: POST
url: https://api.linear.app/graphql
body:
query:
type: string
required: true
description: 'GraphQL query using issueSearch, e.g. { issueSearch(query: "bug") { nodes { identifier title state { name } } } }'
trust_level: auto
- name: create_issue
description: Create a new Linear issue
method: POST
url: https://api.linear.app/graphql
body:
query:
type: string
required: true
description: 'Mutation: mutation { issueCreate(input: {teamId: "...", title: "...", description: "..."}) { issue { id identifier title } } }'
trust_level: confirm
- name: update_issue_state
description: Update issue status/state
method: POST
url: https://api.linear.app/graphql
body:
query:
type: string
required: true
description: 'Mutation: mutation { issueUpdate(id: "...", input: {stateId: "..."}) { issue { id identifier state { name } } } }'
trust_level: confirm
- name: list_cycles
description: List active and upcoming cycles (sprints)
method: POST
url: https://api.linear.app/graphql
body:
query:
type: string
default: '{ cycles(first: 10, orderBy: createdAt, filter: { completedAt: { null: true } }) { nodes { id name number startsAt endsAt progress { total completed } } } }'
trust_level: auto
sync:
table: linear_issues
schedule: every_15m
mapping:
id: id
identifier: identifier
title: title
status: state.name
assignee: assignee.name
priority: priorityLabel

121
connectors/mongodb.yaml Normal file
View File

@@ -0,0 +1,121 @@
# MongoDB connector — document database.
# Created: 2026-03-30
# NOTE: This YAML defines the connector metadata (icon, credentials, display).
# Actual execution is handled by the native MongoDBAdapter, not the YAML REST engine.
name: mongodb
display_name: MongoDB
type: database
icon: database
auth:
method: none
credentials:
- name: MONGO_URI
description: Full connection URI (mongodb://... or mongodb+srv://...). If provided, host/port/user/pass are ignored.
required: false
- name: MONGO_HOST
description: MongoDB host (default localhost)
required: false
- name: MONGO_PORT
description: MongoDB port (default 27017)
required: false
- name: MONGO_DATABASE
description: Database name
required: true
- name: MONGO_USER
description: Username (if auth enabled)
required: false
- name: MONGO_PASSWORD
description: Password
required: false
actions:
- name: list_collections
description: List all collections in the database
method: LOCAL
trust_level: auto
- name: find
description: Query documents in a collection
method: LOCAL
params:
collection: { type: string, required: true }
filter: { type: object, description: "MongoDB query filter (JSON)" }
limit: { type: integer, default: 20 }
sort: { type: object, description: "Sort spec, e.g. {\"created\": -1}" }
trust_level: auto
- name: find_one
description: Get a single document by filter or _id
method: LOCAL
params:
collection: { type: string, required: true }
filter: { type: object, required: true }
trust_level: auto
- name: count
description: Count documents in a collection
method: LOCAL
params:
collection: { type: string, required: true }
filter: { type: object }
trust_level: auto
- name: distinct
description: Get distinct values for a field
method: LOCAL
params:
collection: { type: string, required: true }
field: { type: string, required: true }
trust_level: auto
- name: aggregate
description: Run an aggregation pipeline
method: LOCAL
params:
collection: { type: string, required: true }
pipeline: { type: object, required: true, description: "Aggregation pipeline array" }
trust_level: confirm
- name: collection_stats
description: Get stats for all collections (doc count, size)
method: LOCAL
trust_level: auto
- name: indexes
description: List indexes on a collection
method: LOCAL
params:
collection: { type: string, required: true }
trust_level: auto
- name: insert_one
description: Insert a document
method: LOCAL
params:
collection: { type: string, required: true }
document: { type: object, required: true }
trust_level: confirm
- name: update_many
description: Update documents matching a filter
method: LOCAL
params:
collection: { type: string, required: true }
filter: { type: object, required: true }
update: { type: object, required: true }
trust_level: confirm
- name: delete_many
description: Delete documents matching a filter
method: LOCAL
params:
collection: { type: string, required: true }
filter: { type: object, required: true }
trust_level: restricted
sync:
table: null
schedule: manual
mapping: {}

View File

@@ -0,0 +1,95 @@
# Microsoft Teams connector (data source) — search messages, channels, and teams.
# Created: 2026-03-30
# NOTE: This is the Teams *data connector* for querying workspace data.
# The Teams *channel adapter* (bus/adapters/teams_adapter.py) handles real-time messaging.
name: ms_teams_data
display_name: Microsoft Teams (Data)
type: communication
icon: users
auth:
method: bearer
credentials:
- name: MS_ACCESS_TOKEN
description: Microsoft Graph API access token
required: true
actions:
- name: list_teams
description: List teams the user has joined
method: GET
url: https://graph.microsoft.com/v1.0/me/joinedTeams
trust_level: auto
- name: list_channels
description: List channels in a team
method: GET
url: https://graph.microsoft.com/v1.0/teams/{team_id}/channels
params:
team_id: { type: string, required: true }
trust_level: auto
- name: channel_messages
description: Get recent messages from a channel
method: GET
url: https://graph.microsoft.com/v1.0/teams/{team_id}/channels/{channel_id}/messages
params:
team_id: { type: string, required: true }
channel_id: { type: string, required: true }
trust_level: auto
- name: list_chats
description: List recent 1:1 and group chats
method: GET
url: https://graph.microsoft.com/v1.0/me/chats
params:
$top: { type: integer, default: 25 }
trust_level: auto
- name: chat_messages
description: Get messages from a chat
method: GET
url: https://graph.microsoft.com/v1.0/chats/{chat_id}/messages
params:
chat_id: { type: string, required: true }
$top: { type: integer, default: 25 }
trust_level: auto
- name: search_messages
description: Search across all Teams messages
method: POST
url: https://graph.microsoft.com/v1.0/search/query
body:
requests:
type: object
required: true
description: "[{entityTypes: ['chatMessage'], query: {queryString: 'search term'}}]"
trust_level: auto
- name: list_team_members
description: List members of a team
method: GET
url: https://graph.microsoft.com/v1.0/teams/{team_id}/members
params:
team_id: { type: string, required: true }
trust_level: auto
- name: send_channel_message
description: Send a message to a Teams channel
method: POST
url: https://graph.microsoft.com/v1.0/teams/{team_id}/channels/{channel_id}/messages
params:
team_id: { type: string, required: true }
channel_id: { type: string, required: true }
body:
body:
type: object
required: true
description: "{contentType: 'text', content: 'Message text'}"
trust_level: confirm
sync:
table: teams_messages
schedule: manual
mapping: {}

105
connectors/notion.yaml Normal file
View File

@@ -0,0 +1,105 @@
# Notion connector — workspace pages, databases, and knowledge base.
# Created: 2026-03-30
name: notion
display_name: Notion
type: knowledge
icon: book-open
auth:
method: bearer
headers:
Notion-Version: "2022-06-28"
credentials:
- name: NOTION_API_KEY
description: Notion integration token (internal integration secret)
required: true
actions:
- name: search
description: Search across all Notion pages and databases
method: POST
url: https://api.notion.com/v1/search
body:
query: { type: string, description: "Search term" }
filter: { type: object, description: "{property: 'object', value: 'page'|'database'}" }
page_size: { type: integer, default: 10 }
trust_level: auto
- name: list_databases
description: List all databases shared with the integration
method: POST
url: https://api.notion.com/v1/search
body:
filter: { type: object, default: { property: "object", value: "database" } }
page_size: { type: integer, default: 25 }
trust_level: auto
- name: query_database
description: Query a Notion database with filters and sorts
method: POST
url: https://api.notion.com/v1/databases/{database_id}/query
params:
database_id: { type: string, required: true, description: "Database UUID" }
body:
filter: { type: object, description: "Notion filter object" }
sorts: { type: object, description: "Sort criteria" }
page_size: { type: integer, default: 25 }
trust_level: auto
- name: get_page
description: Retrieve a Notion page and its properties
method: GET
url: https://api.notion.com/v1/pages/{page_id}
params:
page_id: { type: string, required: true, description: "Page UUID" }
trust_level: auto
- name: get_page_content
description: Get the block content (body) of a page
method: GET
url: https://api.notion.com/v1/blocks/{block_id}/children
params:
block_id: { type: string, required: true, description: "Page or block UUID" }
page_size: { type: integer, default: 100 }
trust_level: auto
- name: create_page
description: Create a new page in a database or as a child of another page
method: POST
url: https://api.notion.com/v1/pages
body:
parent: { type: object, required: true, description: "{database_id: '...'} or {page_id: '...'}" }
properties: { type: object, required: true, description: "Page properties matching the database schema" }
children: { type: object, description: "Block content for the page body" }
trust_level: confirm
- name: update_page
description: Update page properties
method: PATCH
url: https://api.notion.com/v1/pages/{page_id}
params:
page_id: { type: string, required: true }
body:
properties: { type: object, required: true, description: "Properties to update" }
trust_level: confirm
- name: append_blocks
description: Append content blocks to a page
method: PATCH
url: https://api.notion.com/v1/blocks/{block_id}/children
params:
block_id: { type: string, required: true, description: "Parent page/block UUID" }
body:
children: { type: object, required: true, description: "Array of block objects to append" }
trust_level: confirm
sync:
table: notion_pages
schedule: every_30m
mapping:
id: id
title: properties.title
url: url
created: created_time
updated: last_edited_time

121
connectors/pagerduty.yaml Normal file
View File

@@ -0,0 +1,121 @@
# PagerDuty connector — incident management and on-call scheduling.
# Created: 2026-03-30
name: pagerduty
display_name: PagerDuty
type: incident-management
icon: alert-triangle
auth:
method: bearer
headers:
Accept: application/vnd.pagerduty+json;version=2
Content-Type: application/json
credentials:
- name: PAGERDUTY_API_KEY
description: PagerDuty REST API key (v2)
required: true
actions:
- name: list_incidents
description: List recent incidents
method: GET
url: https://api.pagerduty.com/incidents
params:
statuses[]: { type: string, enum: [triggered, acknowledged, resolved], default: triggered }
sort_by: { type: string, default: "created_at:desc" }
limit: { type: integer, default: 25 }
trust_level: auto
- name: get_incident
description: Get details of a specific incident
method: GET
url: https://api.pagerduty.com/incidents/{incident_id}
params:
incident_id: { type: string, required: true }
trust_level: auto
- name: list_oncalls
description: List who is currently on call
method: GET
url: https://api.pagerduty.com/oncalls
params:
limit: { type: integer, default: 25 }
trust_level: auto
- name: list_services
description: List monitored services
method: GET
url: https://api.pagerduty.com/services
params:
limit: { type: integer, default: 25 }
trust_level: auto
- name: list_escalation_policies
description: List escalation policies
method: GET
url: https://api.pagerduty.com/escalation_policies
params:
limit: { type: integer, default: 25 }
trust_level: auto
- name: acknowledge_incident
description: Acknowledge an open incident
method: PUT
url: https://api.pagerduty.com/incidents/{incident_id}
params:
incident_id: { type: string, required: true }
body:
incident:
type: object
required: true
description: "{type: 'incident_reference', status: 'acknowledged'}"
trust_level: confirm
- name: resolve_incident
description: Resolve an incident
method: PUT
url: https://api.pagerduty.com/incidents/{incident_id}
params:
incident_id: { type: string, required: true }
body:
incident:
type: object
required: true
description: "{type: 'incident_reference', status: 'resolved'}"
trust_level: confirm
- name: create_incident
description: Trigger a new incident
method: POST
url: https://api.pagerduty.com/incidents
body:
incident:
type: object
required: true
description: "{type: 'incident', title: '...', service: {id: '...', type: 'service_reference'}, urgency: 'high'}"
trust_level: restricted
- name: add_note
description: Add a note to an incident
method: POST
url: https://api.pagerduty.com/incidents/{incident_id}/notes
params:
incident_id: { type: string, required: true }
body:
note:
type: object
required: true
description: "{content: 'Note text'}"
trust_level: confirm
sync:
table: pagerduty_incidents
schedule: every_5m
mapping:
id: id
title: title
status: status
urgency: urgency
service: service.summary
created: created_at

View File

@@ -0,0 +1,92 @@
# PostgreSQL connector — relational database queries.
# Created: 2026-03-30
name: postgresql
display_name: PostgreSQL
type: database
icon: server
auth:
method: none
credentials:
- name: PG_HOST
description: Database host (e.g. localhost or db.example.com)
required: true
- name: PG_PORT
description: Database port
required: false
- name: PG_DATABASE
description: Database name
required: true
- name: PG_USER
description: Database user
required: true
- name: PG_PASSWORD
description: Database password
required: true
- name: PG_SSL
description: Enable SSL (true/false)
required: false
actions:
- name: execute_query
description: Execute a SQL query (SELECT, INSERT, UPDATE, etc.)
method: LOCAL
params:
query: { type: string, required: true, description: "SQL query to execute" }
limit: { type: integer, default: 100, description: "Row limit for SELECT queries" }
trust_level: confirm
- name: list_tables
description: List all tables in the current database
method: LOCAL
params:
schema: { type: string, default: "public" }
trust_level: auto
- name: describe_table
description: Show column definitions for a table
method: LOCAL
params:
table: { type: string, required: true }
schema: { type: string, default: "public" }
trust_level: auto
- name: preview_table
description: Preview rows from a table
method: LOCAL
params:
table: { type: string, required: true }
schema: { type: string, default: "public" }
limit: { type: integer, default: 20 }
trust_level: auto
- name: list_schemas
description: List all schemas in the database
method: LOCAL
trust_level: auto
- name: table_stats
description: Get row counts and size estimates for tables
method: LOCAL
params:
schema: { type: string, default: "public" }
trust_level: auto
- name: list_indexes
description: List indexes on a table
method: LOCAL
params:
table: { type: string, required: true }
schema: { type: string, default: "public" }
trust_level: auto
- name: active_queries
description: List currently running queries
method: LOCAL
trust_level: auto
sync:
table: null
schedule: manual
mapping: {}

122
connectors/quickbooks.yaml Normal file
View File

@@ -0,0 +1,122 @@
# QuickBooks Online connector — accounting, invoices, and financial data.
# Created: 2026-03-30
name: quickbooks
display_name: QuickBooks Online
type: accounting
icon: receipt
auth:
method: bearer
credentials:
- name: QBO_ACCESS_TOKEN
description: QuickBooks OAuth 2.0 access token
required: true
- name: QBO_REALM_ID
description: QuickBooks company/realm ID
required: true
- name: QBO_ENVIRONMENT
description: Environment (sandbox or production)
required: false
actions:
- name: list_invoices
description: List invoices
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/query"
params:
query: { type: string, default: "SELECT * FROM Invoice WHERE Balance > '0' ORDERBY DueDate" }
max_results: { type: integer, default: 25 }
trust_level: auto
- name: list_customers
description: List customers
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/query"
params:
query: { type: string, default: "SELECT * FROM Customer WHERE Active = true ORDERBY DisplayName" }
max_results: { type: integer, default: 50 }
trust_level: auto
- name: list_bills
description: List vendor bills
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/query"
params:
query: { type: string, default: "SELECT * FROM Bill WHERE Balance > '0' ORDERBY DueDate" }
max_results: { type: integer, default: 25 }
trust_level: auto
- name: list_expenses
description: List recent expenses/purchases
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/query"
params:
query: { type: string, default: "SELECT * FROM Purchase ORDERBY TxnDate DESC" }
max_results: { type: integer, default: 25 }
trust_level: auto
- name: list_accounts
description: List chart of accounts
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/query"
params:
query: { type: string, default: "SELECT * FROM Account WHERE Active = true ORDERBY AccountType" }
trust_level: auto
- name: profit_loss
description: Get Profit & Loss report
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/reports/ProfitAndLoss"
params:
start_date: { type: string, description: "Start date (YYYY-MM-DD)" }
end_date: { type: string, description: "End date (YYYY-MM-DD)" }
summarize_column_by: { type: string, enum: [Total, Month, Week, Days], default: Total }
trust_level: auto
- name: balance_sheet
description: Get Balance Sheet report
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/reports/BalanceSheet"
params:
date: { type: string, description: "As-of date (YYYY-MM-DD)" }
summarize_column_by: { type: string, enum: [Total, Month, Week], default: Total }
trust_level: auto
- name: query
description: Run a custom QuickBooks query
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/query"
params:
query: { type: string, required: true, description: "QuickBooks SQL-like query (e.g. SELECT * FROM Invoice WHERE TotalAmt > '1000')" }
trust_level: confirm
- name: list_vendors
description: List vendors
method: GET
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/query"
params:
query: { type: string, default: "SELECT * FROM Vendor WHERE Active = true ORDERBY DisplayName" }
trust_level: auto
- name: create_invoice
description: Create a new invoice
method: POST
url: "https://quickbooks.api.intuit.com/v3/company/{QBO_REALM_ID}/invoice"
body:
CustomerRef: { type: object, required: true, description: "{value: 'customer_id'}" }
Line: { type: object, required: true, description: "Array of line items" }
DueDate: { type: string }
trust_level: confirm
sync:
table: qbo_invoices
schedule: every_30m
mapping:
id: Id
doc_number: DocNumber
customer: CustomerRef.name
total: TotalAmt
balance: Balance
due_date: DueDate
status: Balance

View File

@@ -0,0 +1,71 @@
# Generic REST connector — connect any REST API with user-defined endpoints.
# Created: 2026-03-27
# Updated: 2026-03-30 — Added PUT, PATCH, DELETE methods and header support.
name: rest_generic
display_name: REST API
type: api
icon: globe
auth:
method: bearer
credentials:
- name: BASE_URL
description: Base URL for the API (e.g. https://api.example.com)
required: true
- name: API_TOKEN
description: Bearer token or API key
required: false
- name: CUSTOM_HEADER_NAME
description: Custom header name (e.g. X-Api-Key) — if your API uses non-Bearer auth
required: false
- name: CUSTOM_HEADER_VALUE
description: Custom header value
required: false
actions:
- name: get_endpoint
description: Make a GET request to any endpoint
method: GET
params:
path: { type: string, required: true, description: "API path (e.g. /users)" }
query: { type: object, description: "Query parameters as key=value" }
trust_level: auto
- name: post_endpoint
description: Make a POST request with JSON body
method: POST
params:
path: { type: string, required: true }
body:
data: { type: object, description: "JSON request body" }
trust_level: confirm
- name: put_endpoint
description: Make a PUT request to replace a resource
method: PUT
params:
path: { type: string, required: true }
body:
data: { type: object, description: "JSON request body" }
trust_level: confirm
- name: patch_endpoint
description: Make a PATCH request to partially update a resource
method: PATCH
params:
path: { type: string, required: true }
body:
data: { type: object, description: "JSON request body" }
trust_level: confirm
- name: delete_endpoint
description: Make a DELETE request to remove a resource
method: DELETE
params:
path: { type: string, required: true }
trust_level: restricted
sync:
table: api_data
schedule: manual
mapping: {}

114
connectors/salesforce.yaml Normal file
View File

@@ -0,0 +1,114 @@
# Salesforce connector — enterprise CRM data integration.
# Created: 2026-03-30
name: salesforce
display_name: Salesforce
type: crm
icon: cloud
auth:
method: bearer
credentials:
- name: SF_ACCESS_TOKEN
description: Salesforce OAuth access token
required: true
- name: SF_INSTANCE_URL
description: Salesforce instance URL (e.g. https://yourorg.salesforce.com)
required: true
actions:
- name: query_soql
description: Run a SOQL query against Salesforce
method: GET
url: "{SF_INSTANCE_URL}/services/data/v59.0/query"
params:
q: { type: string, required: true, description: "SOQL query (e.g. SELECT Id, Name FROM Account LIMIT 10)" }
trust_level: auto
- name: list_accounts
description: List Salesforce accounts (companies)
method: GET
url: "{SF_INSTANCE_URL}/services/data/v59.0/query"
params:
q: { type: string, default: "SELECT Id, Name, Industry, Website, AnnualRevenue, NumberOfEmployees FROM Account ORDER BY LastModifiedDate DESC LIMIT 25" }
trust_level: auto
- name: list_contacts
description: List Salesforce contacts
method: GET
url: "{SF_INSTANCE_URL}/services/data/v59.0/query"
params:
q: { type: string, default: "SELECT Id, FirstName, LastName, Email, Phone, AccountId, Title FROM Contact ORDER BY LastModifiedDate DESC LIMIT 25" }
trust_level: auto
- name: list_opportunities
description: List open sales opportunities
method: GET
url: "{SF_INSTANCE_URL}/services/data/v59.0/query"
params:
q: { type: string, default: "SELECT Id, Name, StageName, Amount, CloseDate, AccountId, Probability FROM Opportunity WHERE IsClosed = false ORDER BY CloseDate ASC LIMIT 25" }
trust_level: auto
- name: list_leads
description: List recent leads
method: GET
url: "{SF_INSTANCE_URL}/services/data/v59.0/query"
params:
q: { type: string, default: "SELECT Id, FirstName, LastName, Email, Company, Status, LeadSource FROM Lead WHERE IsConverted = false ORDER BY CreatedDate DESC LIMIT 25" }
trust_level: auto
- name: list_cases
description: List support cases
method: GET
url: "{SF_INSTANCE_URL}/services/data/v59.0/query"
params:
q: { type: string, default: "SELECT Id, CaseNumber, Subject, Status, Priority, ContactId, AccountId FROM Case WHERE IsClosed = false ORDER BY CreatedDate DESC LIMIT 25" }
trust_level: auto
- name: create_lead
description: Create a new lead in Salesforce
method: POST
url: "{SF_INSTANCE_URL}/services/data/v59.0/sobjects/Lead"
body:
FirstName: { type: string }
LastName: { type: string, required: true }
Email: { type: string }
Company: { type: string, required: true }
Phone: { type: string }
LeadSource: { type: string }
trust_level: confirm
- name: create_contact
description: Create a new contact
method: POST
url: "{SF_INSTANCE_URL}/services/data/v59.0/sobjects/Contact"
body:
FirstName: { type: string }
LastName: { type: string, required: true }
Email: { type: string }
Phone: { type: string }
AccountId: { type: string }
Title: { type: string }
trust_level: confirm
- name: update_opportunity_stage
description: Update the stage of an opportunity
method: PATCH
url: "{SF_INSTANCE_URL}/services/data/v59.0/sobjects/Opportunity/{opportunity_id}"
params:
opportunity_id: { type: string, required: true, description: "Opportunity record ID" }
body:
StageName: { type: string, required: true }
Amount: { type: number }
CloseDate: { type: string }
trust_level: confirm
sync:
table: salesforce_accounts
schedule: every_15m
mapping:
id: Id
name: Name
industry: Industry
website: Website
revenue: AnnualRevenue

128
connectors/servicenow.yaml Normal file
View File

@@ -0,0 +1,128 @@
# ServiceNow connector — IT service management (ITSM) and operations.
# Created: 2026-03-30
name: servicenow
display_name: ServiceNow
type: itsm
icon: settings
auth:
method: basic
credentials:
- name: SNOW_INSTANCE_URL
description: ServiceNow instance URL (e.g. https://yourorg.service-now.com)
required: true
- name: SNOW_USERNAME
description: ServiceNow username
required: true
- name: SNOW_PASSWORD
description: ServiceNow password
required: true
actions:
- name: list_incidents
description: List IT incidents
method: GET
url: "{SNOW_INSTANCE_URL}/api/now/table/incident"
params:
sysparm_limit: { type: integer, default: 25 }
sysparm_query: { type: string, default: "active=true^ORDERBYDESCsys_updated_on", description: "ServiceNow encoded query" }
sysparm_display_value: { type: boolean, default: true }
sysparm_fields: { type: string, default: "number,short_description,state,priority,assigned_to,category,opened_at,sys_updated_on" }
trust_level: auto
- name: get_incident
description: Get a specific incident by number or sys_id
method: GET
url: "{SNOW_INSTANCE_URL}/api/now/table/incident/{sys_id}"
params:
sys_id: { type: string, required: true, description: "Incident sys_id or number" }
sysparm_display_value: { type: boolean, default: true }
trust_level: auto
- name: list_change_requests
description: List change requests
method: GET
url: "{SNOW_INSTANCE_URL}/api/now/table/change_request"
params:
sysparm_limit: { type: integer, default: 25 }
sysparm_query: { type: string, default: "active=true^ORDERBYDESCsys_updated_on" }
sysparm_display_value: { type: boolean, default: true }
sysparm_fields: { type: string, default: "number,short_description,state,type,risk,assigned_to,start_date,end_date" }
trust_level: auto
- name: list_service_requests
description: List service requests (catalog items)
method: GET
url: "{SNOW_INSTANCE_URL}/api/now/table/sc_request"
params:
sysparm_limit: { type: integer, default: 25 }
sysparm_query: { type: string, default: "active=true^ORDERBYDESCsys_updated_on" }
sysparm_display_value: { type: boolean, default: true }
trust_level: auto
- name: list_problems
description: List problem records
method: GET
url: "{SNOW_INSTANCE_URL}/api/now/table/problem"
params:
sysparm_limit: { type: integer, default: 25 }
sysparm_query: { type: string, default: "active=true^ORDERBYDESCsys_updated_on" }
sysparm_display_value: { type: boolean, default: true }
trust_level: auto
- name: create_incident
description: Create a new incident
method: POST
url: "{SNOW_INSTANCE_URL}/api/now/table/incident"
body:
short_description: { type: string, required: true }
description: { type: string }
urgency: { type: integer, enum: [1, 2, 3], default: 2, description: "1=High, 2=Medium, 3=Low" }
impact: { type: integer, enum: [1, 2, 3], default: 2 }
category: { type: string }
assignment_group: { type: string }
trust_level: confirm
- name: update_incident
description: Update an incident
method: PATCH
url: "{SNOW_INSTANCE_URL}/api/now/table/incident/{sys_id}"
params:
sys_id: { type: string, required: true }
body:
state: { type: integer, description: "1=New, 2=InProgress, 3=OnHold, 6=Resolved, 7=Closed" }
work_notes: { type: string }
assigned_to: { type: string }
trust_level: confirm
- name: list_cmdb_servers
description: List CMDB server configuration items
method: GET
url: "{SNOW_INSTANCE_URL}/api/now/table/cmdb_ci_server"
params:
sysparm_limit: { type: integer, default: 25 }
sysparm_display_value: { type: boolean, default: true }
sysparm_fields: { type: string, default: "name,ip_address,os,operational_status,environment" }
trust_level: auto
- name: search_knowledge
description: Search the knowledge base
method: GET
url: "{SNOW_INSTANCE_URL}/api/now/table/kb_knowledge"
params:
sysparm_query: { type: string, required: true, description: "Encoded query (e.g. short_descriptionLIKEpassword reset)" }
sysparm_limit: { type: integer, default: 10 }
trust_level: auto
sync:
table: snow_incidents
schedule: every_15m
mapping:
id: sys_id
number: number
description: short_description
state: state
priority: priority
assigned_to: assigned_to
updated: sys_updated_on

116
connectors/sharepoint.yaml Normal file
View File

@@ -0,0 +1,116 @@
# SharePoint connector — Microsoft 365 document management and intranet.
# Created: 2026-03-30
name: sharepoint
display_name: SharePoint
type: knowledge
icon: folder-open
auth:
method: bearer
credentials:
- name: MS_ACCESS_TOKEN
description: Microsoft Graph API access token (delegated or application)
required: true
- name: MS_TENANT_ID
description: Azure AD tenant ID (optional, for token refresh)
required: false
actions:
- name: list_sites
description: List SharePoint sites accessible to the user
method: GET
url: https://graph.microsoft.com/v1.0/sites
params:
search: { type: string, description: "Search term to filter sites" }
trust_level: auto
- name: get_site
description: Get a specific SharePoint site
method: GET
url: https://graph.microsoft.com/v1.0/sites/{site_id}
params:
site_id: { type: string, required: true, description: "Site ID or hostname:path format" }
trust_level: auto
- name: list_drives
description: List document libraries (drives) in a site
method: GET
url: https://graph.microsoft.com/v1.0/sites/{site_id}/drives
params:
site_id: { type: string, required: true }
trust_level: auto
- name: list_files
description: List files and folders in a drive or folder
method: GET
url: https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children
params:
site_id: { type: string, required: true }
drive_id: { type: string, required: true }
folder_path: { type: string, default: "root", description: "Folder path or 'root'" }
trust_level: auto
- name: search_files
description: Search for files across SharePoint
method: GET
url: https://graph.microsoft.com/v1.0/search/query
params:
query: { type: string, required: true, description: "Search query" }
trust_level: auto
- name: get_file_content
description: Download a file's content
method: GET
url: https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/items/{item_id}/content
params:
site_id: { type: string, required: true }
drive_id: { type: string, required: true }
item_id: { type: string, required: true }
trust_level: auto
- name: list_lists
description: List SharePoint lists in a site
method: GET
url: https://graph.microsoft.com/v1.0/sites/{site_id}/lists
params:
site_id: { type: string, required: true }
trust_level: auto
- name: list_items
description: Get items from a SharePoint list
method: GET
url: https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items
params:
site_id: { type: string, required: true }
list_id: { type: string, required: true }
expand: { type: string, default: "fields" }
trust_level: auto
- name: search_content
description: Full-text search across all SharePoint content
method: POST
url: https://graph.microsoft.com/v1.0/search/query
body:
requests:
type: object
required: true
description: "[{entityTypes: ['driveItem', 'listItem', 'site'], query: {queryString: '...'}}]"
trust_level: auto
- name: list_recent_files
description: List recently accessed files
method: GET
url: https://graph.microsoft.com/v1.0/me/drive/recent
trust_level: auto
sync:
table: sharepoint_files
schedule: every_30m
mapping:
id: id
name: name
path: parentReference.path
size: size
modified_by: lastModifiedBy.user.displayName
updated: lastModifiedDateTime

107
connectors/shopify.yaml Normal file
View File

@@ -0,0 +1,107 @@
# Shopify connector — e-commerce store data (orders, products, customers).
# Created: 2026-03-30
name: shopify
display_name: Shopify
type: ecommerce
icon: shopping-bag
auth:
method: bearer
credentials:
- name: SHOPIFY_STORE_URL
description: Shopify store URL (e.g. your-store.myshopify.com)
required: true
- name: SHOPIFY_ACCESS_TOKEN
description: Shopify Admin API access token
required: true
actions:
- name: list_orders
description: List recent orders
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/orders.json"
params:
status: { type: string, enum: [open, closed, cancelled, any], default: any }
limit: { type: integer, default: 25 }
financial_status: { type: string, enum: [authorized, pending, paid, refunded, voided, any] }
trust_level: auto
- name: list_products
description: List products in the store
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/products.json"
params:
limit: { type: integer, default: 25 }
status: { type: string, enum: [active, archived, draft] }
collection_id: { type: string }
trust_level: auto
- name: list_customers
description: List customers
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/customers.json"
params:
limit: { type: integer, default: 25 }
trust_level: auto
- name: search_customers
description: Search customers by query
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/customers/search.json"
params:
query: { type: string, required: true, description: "Search query (email, name, etc.)" }
limit: { type: integer, default: 25 }
trust_level: auto
- name: get_order
description: Get detailed order information
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/orders/{order_id}.json"
params:
order_id: { type: string, required: true }
trust_level: auto
- name: list_inventory
description: List inventory levels
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/inventory_levels.json"
params:
location_ids: { type: string, required: true, description: "Comma-separated location IDs" }
limit: { type: integer, default: 50 }
trust_level: auto
- name: get_shop_info
description: Get store information and settings
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/shop.json"
trust_level: auto
- name: list_collections
description: List product collections
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/custom_collections.json"
params:
limit: { type: integer, default: 25 }
trust_level: auto
- name: count_orders
description: Get order count with optional filters
method: GET
url: "https://{SHOPIFY_STORE_URL}/admin/api/2024-01/orders/count.json"
params:
status: { type: string, enum: [open, closed, any], default: any }
financial_status: { type: string, enum: [authorized, pending, paid, refunded, voided, any] }
created_at_min: { type: string, description: "ISO 8601 date" }
trust_level: auto
sync:
table: shopify_orders
schedule: every_15m
mapping:
id: id
order_number: order_number
total_price: total_price
status: financial_status
customer_email: email
created: created_at

View File

@@ -0,0 +1,91 @@
# Slack connector (data source) — search messages, channels, and users.
# Created: 2026-03-30
# NOTE: This is the Slack *data connector* for querying workspace data.
# The Slack *channel adapter* (bus/adapters/slack_adapter.py) handles real-time messaging.
name: slack_data
display_name: Slack (Data)
type: communication
icon: hash
auth:
method: bearer
credentials:
- name: SLACK_BOT_TOKEN
description: Slack Bot User OAuth Token (xoxb-...)
required: true
actions:
- name: search_messages
description: Search messages across all channels
method: GET
url: https://slack.com/api/search.messages
params:
query: { type: string, required: true, description: "Search query (supports Slack search operators)" }
count: { type: integer, default: 20 }
sort: { type: string, enum: [score, timestamp], default: score }
trust_level: auto
- name: list_channels
description: List public and private channels
method: GET
url: https://slack.com/api/conversations.list
params:
types: { type: string, default: "public_channel,private_channel" }
limit: { type: integer, default: 50 }
exclude_archived: { type: boolean, default: true }
trust_level: auto
- name: channel_history
description: Get recent messages from a channel
method: GET
url: https://slack.com/api/conversations.history
params:
channel: { type: string, required: true, description: "Channel ID" }
limit: { type: integer, default: 25 }
trust_level: auto
- name: list_users
description: List workspace members
method: GET
url: https://slack.com/api/users.list
params:
limit: { type: integer, default: 100 }
trust_level: auto
- name: get_user_info
description: Get detailed info about a user
method: GET
url: https://slack.com/api/users.info
params:
user: { type: string, required: true, description: "User ID" }
trust_level: auto
- name: channel_info
description: Get detailed channel information
method: GET
url: https://slack.com/api/conversations.info
params:
channel: { type: string, required: true }
trust_level: auto
- name: list_pins
description: List pinned messages in a channel
method: GET
url: https://slack.com/api/pins.list
params:
channel: { type: string, required: true }
trust_level: auto
- name: list_bookmarks
description: List bookmarks in a channel
method: GET
url: https://slack.com/api/bookmarks.list
params:
channel_id: { type: string, required: true }
trust_level: auto
sync:
table: slack_messages
schedule: manual
mapping: {}

102
connectors/snowflake.yaml Normal file
View File

@@ -0,0 +1,102 @@
# Snowflake connector — cloud data warehouse queries and management.
# Created: 2026-03-30
name: snowflake
display_name: Snowflake
type: database
icon: snowflake
auth:
method: basic
credentials:
- name: SNOWFLAKE_ACCOUNT
description: Snowflake account identifier (e.g. xy12345.us-east-1)
required: true
- name: SNOWFLAKE_USER
description: Snowflake username
required: true
- name: SNOWFLAKE_PASSWORD
description: Snowflake password
required: true
- name: SNOWFLAKE_WAREHOUSE
description: Default warehouse name
required: false
- name: SNOWFLAKE_DATABASE
description: Default database name
required: false
- name: SNOWFLAKE_SCHEMA
description: Default schema (e.g. PUBLIC)
required: false
actions:
- name: execute_query
description: Execute a SQL query against Snowflake
method: LOCAL
params:
query: { type: string, required: true, description: "SQL query to execute" }
warehouse: { type: string, description: "Warehouse to use (overrides default)" }
database: { type: string, description: "Database to use" }
schema: { type: string, description: "Schema to use" }
limit: { type: integer, default: 100 }
trust_level: confirm
- name: list_databases
description: List all accessible databases
method: LOCAL
params:
query: { type: string, default: "SHOW DATABASES" }
trust_level: auto
- name: list_schemas
description: List schemas in a database
method: LOCAL
params:
database: { type: string, required: true }
query: { type: string, default: "SHOW SCHEMAS IN DATABASE {database}" }
trust_level: auto
- name: list_tables
description: List tables in a schema
method: LOCAL
params:
database: { type: string, required: true }
schema: { type: string, default: "PUBLIC" }
query: { type: string, default: "SHOW TABLES IN {database}.{schema}" }
trust_level: auto
- name: describe_table
description: Get column definitions for a table
method: LOCAL
params:
table: { type: string, required: true, description: "Fully qualified table name (db.schema.table)" }
query: { type: string, default: "DESCRIBE TABLE {table}" }
trust_level: auto
- name: preview_table
description: Preview rows from a table
method: LOCAL
params:
table: { type: string, required: true }
limit: { type: integer, default: 20 }
query: { type: string, default: "SELECT * FROM {table} LIMIT {limit}" }
trust_level: auto
- name: list_warehouses
description: List available warehouses
method: LOCAL
params:
query: { type: string, default: "SHOW WAREHOUSES" }
trust_level: auto
- name: query_history
description: View recent query history
method: LOCAL
params:
limit: { type: integer, default: 20 }
query: { type: string, default: "SELECT query_id, query_text, database_name, schema_name, warehouse_name, execution_status, total_elapsed_time, rows_produced, start_time FROM table(information_schema.query_history()) ORDER BY start_time DESC LIMIT {limit}" }
trust_level: auto
sync:
table: null
schedule: manual
mapping: {}

155
connectors/stripe.yaml Normal file
View File

@@ -0,0 +1,155 @@
# Stripe connector — payment data integration.
# Created: 2026-03-27
# Updated: 2026-03-30 — Added subscriptions, balance transactions, payouts, disputes, refunds.
name: stripe
display_name: Stripe
type: payment
icon: credit-card
auth:
method: api_key
credentials:
- name: STRIPE_API_KEY
description: Stripe secret key (sk_...)
required: true
actions:
- name: list_invoices
description: List recent invoices
method: GET
url: https://api.stripe.com/v1/invoices
content_type: form
params:
limit: { type: integer, default: 25 }
status: { type: string, enum: [draft, open, paid, void, uncollectible] }
customer: { type: string, description: "Filter by customer ID" }
trust_level: auto
- name: list_customers
description: List customers
method: GET
url: https://api.stripe.com/v1/customers
content_type: form
params:
limit: { type: integer, default: 25 }
email: { type: string, description: "Filter by email" }
trust_level: auto
- name: list_subscriptions
description: List active subscriptions
method: GET
url: https://api.stripe.com/v1/subscriptions
content_type: form
params:
limit: { type: integer, default: 25 }
status: { type: string, enum: [active, past_due, canceled, unpaid, trialing, all], default: active }
customer: { type: string, description: "Filter by customer ID" }
trust_level: auto
- name: list_charges
description: List recent charges/payments
method: GET
url: https://api.stripe.com/v1/charges
content_type: form
params:
limit: { type: integer, default: 25 }
customer: { type: string }
trust_level: auto
- name: list_balance_transactions
description: List balance transactions (funds movement)
method: GET
url: https://api.stripe.com/v1/balance_transactions
content_type: form
params:
limit: { type: integer, default: 25 }
type: { type: string, enum: [charge, refund, adjustment, payout, transfer] }
trust_level: auto
- name: get_balance
description: Get current account balance
method: GET
url: https://api.stripe.com/v1/balance
trust_level: auto
- name: list_payouts
description: List payouts to your bank account
method: GET
url: https://api.stripe.com/v1/payouts
content_type: form
params:
limit: { type: integer, default: 25 }
status: { type: string, enum: [pending, paid, failed, canceled] }
trust_level: auto
- name: list_disputes
description: List payment disputes
method: GET
url: https://api.stripe.com/v1/disputes
content_type: form
params:
limit: { type: integer, default: 25 }
trust_level: auto
- name: list_refunds
description: List refunds
method: GET
url: https://api.stripe.com/v1/refunds
content_type: form
params:
limit: { type: integer, default: 25 }
charge: { type: string, description: "Filter by charge ID" }
trust_level: auto
- name: list_products
description: List products in your catalog
method: GET
url: https://api.stripe.com/v1/products
content_type: form
params:
limit: { type: integer, default: 25 }
active: { type: boolean }
trust_level: auto
- name: list_prices
description: List prices for products
method: GET
url: https://api.stripe.com/v1/prices
content_type: form
params:
limit: { type: integer, default: 25 }
product: { type: string, description: "Filter by product ID" }
active: { type: boolean }
trust_level: auto
- name: create_invoice
description: Create a new invoice
method: POST
url: https://api.stripe.com/v1/invoices
content_type: form
body:
customer: { type: string, required: true }
description: { type: string }
days_until_due: { type: integer, default: 30 }
trust_level: confirm
- name: create_refund
description: Create a refund for a charge
method: POST
url: https://api.stripe.com/v1/refunds
content_type: form
body:
charge: { type: string, required: true, description: "Charge ID to refund" }
amount: { type: integer, description: "Amount in cents (partial refund). Omit for full refund." }
reason: { type: string, enum: [duplicate, fraudulent, requested_by_customer] }
trust_level: restricted
sync:
table: stripe_invoices
schedule: every_15m
mapping:
id: id
amount: amount_due
status: status
customer: customer
created: created

124
connectors/zendesk.yaml Normal file
View File

@@ -0,0 +1,124 @@
# Zendesk connector — customer support tickets and knowledge base.
# Created: 2026-03-30
name: zendesk
display_name: Zendesk
type: support
icon: headphones
auth:
method: basic
credentials:
- name: ZENDESK_SUBDOMAIN
description: Zendesk subdomain (e.g. yourcompany in yourcompany.zendesk.com)
required: true
- name: ZENDESK_EMAIL
description: Agent email address
required: true
- name: ZENDESK_API_TOKEN
description: Zendesk API token (Admin → Channels → API)
required: true
actions:
- name: list_tickets
description: List recent support tickets
method: GET
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/tickets"
params:
sort_by: { type: string, enum: [created_at, updated_at, priority, status], default: updated_at }
sort_order: { type: string, enum: [asc, desc], default: desc }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: search_tickets
description: Search tickets by query
method: GET
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/search"
params:
query: { type: string, required: true, description: "Search query (e.g. status:open priority:high)" }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: get_ticket
description: Get a specific ticket with comments
method: GET
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/tickets/{ticket_id}"
params:
ticket_id: { type: integer, required: true }
trust_level: auto
- name: list_ticket_comments
description: Get all comments/replies on a ticket
method: GET
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/tickets/{ticket_id}/comments"
params:
ticket_id: { type: integer, required: true }
per_page: { type: integer, default: 50 }
trust_level: auto
- name: create_ticket
description: Create a new support ticket
method: POST
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/tickets"
body:
ticket:
type: object
required: true
description: "{subject: '...', description: '...', priority: 'normal', type: 'incident', tags: [...]}"
trust_level: confirm
- name: update_ticket
description: Update a ticket (status, priority, assignee, etc.)
method: PUT
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/tickets/{ticket_id}"
params:
ticket_id: { type: integer, required: true }
body:
ticket:
type: object
required: true
description: "{status: 'pending', priority: 'high', assignee_id: 123}"
trust_level: confirm
- name: add_ticket_comment
description: Add a comment/reply to a ticket
method: PUT
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/tickets/{ticket_id}"
params:
ticket_id: { type: integer, required: true }
body:
ticket:
type: object
required: true
description: "{comment: {body: '...', public: true}}"
trust_level: confirm
- name: list_users
description: List Zendesk users (agents and end-users)
method: GET
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/users"
params:
role: { type: string, enum: [end-user, agent, admin] }
per_page: { type: integer, default: 25 }
trust_level: auto
- name: ticket_metrics
description: Get ticket metrics and SLA data
method: GET
url: "https://{ZENDESK_SUBDOMAIN}.zendesk.com/api/v2/tickets/{ticket_id}/metrics"
params:
ticket_id: { type: integer, required: true }
trust_level: auto
sync:
table: zendesk_tickets
schedule: every_15m
mapping:
id: id
subject: subject
status: status
priority: priority
requester: requester_id
assignee: assignee_id
created: created_at
updated: updated_at

146
docs/CONNECTORS.md Normal file
View File

@@ -0,0 +1,146 @@
# Connectors — Data Source Integration
Connectors bring external data into PocketPaw Pockets. Each service is defined in a YAML file — the engine reads the definition and handles auth, execution, and sync.
## Quick Start
```bash
# List available connectors
paw connectors list
# Connect Stripe to a pocket
paw connect stripe --pocket "My Business"
# Check connection status
paw connectors status
```
## How It Works
```
Your Service (Stripe, Shopify, CSV, etc.)
Connector YAML (defines endpoints, auth, sync)
DirectREST Engine (reads YAML, makes API calls)
pocket.db (data lands in SQLite tables)
Pocket widgets auto-update with fresh data
```
## Writing a Connector YAML
Each connector is a YAML file in `connectors/`. Here's the structure:
```yaml
# connectors/my_service.yaml
name: my_service
display_name: My Service
type: payment # category for grouping
icon: credit-card # lucide icon name
auth:
method: api_key # api_key | oauth | basic | bearer | none
credentials:
- name: MY_API_KEY
description: API key from My Service dashboard
required: true
actions:
- name: list_items
description: Get all items
method: GET
url: https://api.myservice.com/v1/items
params:
limit: { type: integer, default: 10 }
status: { type: string, enum: [active, archived] }
trust_level: auto # auto | confirm | restricted
- name: create_item
description: Create a new item
method: POST
url: https://api.myservice.com/v1/items
body:
name: { type: string, required: true }
price: { type: number }
trust_level: confirm # requires user approval
sync:
table: my_service_items # target table in pocket.db
schedule: every_15m # polling interval
mapping: # field mapping
id: id
name: name
price: price
created: created_at
```
## Auth Methods
| Method | When to Use | Example |
|--------|-------------|---------|
| `api_key` | Service provides a static API key | Stripe, Tavily |
| `oauth` | Service uses OAuth 2.0 flow | Google, Spotify |
| `bearer` | Token-based auth (API key in Authorization header) | Generic REST APIs |
| `basic` | Username + password auth | Legacy APIs |
| `none` | Public API, no auth needed | Reddit (read-only) |
## Trust Levels
Each action has a trust level that controls how much human oversight the agent needs:
| Level | Behavior | Use For |
|-------|----------|---------|
| `auto` | Agent executes without asking | Read-only operations (list, search) |
| `confirm` | Agent asks user before executing | Write operations (create, update, delete) |
| `restricted` | Requires admin approval | Destructive or financial operations |
## Using with Existing Integrations
PocketPaw already has built-in integrations for Google Workspace, Spotify, and Reddit. These work as **agent tools** (one-off actions via chat). Connectors add **continuous data sync** on top:
| Integration | As Tool (built-in) | As Connector (YAML) |
|-------------|-------------------|---------------------|
| Gmail | "Search my emails for invoices" → one-off result | Sync inbox every 15m → `gmail_messages` table → Pocket widget |
| Google Calendar | "Create a meeting tomorrow" → done | Sync events daily → `calendar_events` table → schedule widget |
| Stripe | (not built-in yet) | Sync invoices → `stripe_invoices` table → revenue dashboard |
| CSV | (not built-in yet) | Import file → custom table → data visualization |
Tools and connectors complement each other. Tools are for actions. Connectors are for data.
## Built-in Connectors
| Connector | File | Auth | Syncs |
|-----------|------|------|-------|
| **Stripe** | `connectors/stripe.yaml` | API key | Invoices, customers |
| **CSV Import** | `connectors/csv.yaml` | None | Any CSV/Excel file |
| **REST API** | `connectors/rest_generic.yaml` | Bearer token | Any REST endpoint |
## Architecture
```
ConnectorProtocol (Python async interface)
├── DirectRESTAdapter ← YAML-defined REST APIs (primary)
├── ComposioAdapter ← 250+ apps with managed OAuth (planned)
└── CuratedMCPAdapter ← Whitelisted MCP servers (planned)
```
The `ConnectorRegistry` auto-discovers YAML files from the `connectors/` directory and manages adapter instances per pocket.
## Adding a New Connector
1. Create `connectors/your_service.yaml` following the schema above
2. Test it: `paw connect your_service --pocket "Test"`
3. The agent can now use it: "Connect my Shopify to this pocket"
That's it. No Python code needed — just YAML.
## Security
- Credentials are never stored in YAML files or pocket.db
- Auth tokens flow through the credential store (Infisical planned)
- Each pocket has isolated connector access
- Trust levels enforce human oversight for write operations
- All connector actions are logged to the audit trail

View File

@@ -0,0 +1,179 @@
# EE Cloud Module — Strip & Rebuild Design
**Date**: 2026-04-04
**Scope**: `ee/cloud/` only — strip and rebuild with clean architecture
**Consumer**: paw-enterprise (SvelteKit/Tauri desktop client)
**Runtime**: headless mode (`pocketpaw serve`), no dashboard dependency
## Context
The ee/cloud module (2400 LOC, 26 files) was built incrementally with hotfixes. It provides multi-tenant workspace, group chat, pockets, sessions, and agent management backed by MongoDB (Beanie ODM) and real-time via Socket.IO.
**Problems**: no service layer, no validation, global state, Socket.IO tightly coupled to ASGI, swallowed exceptions, circular imports, zero tests.
**Decision**: gut it, keep the Beanie models (cleaned up), rewrite all logic with domain-driven subpackages.
## Architecture: Domain Subpackages
```
ee/cloud/
├── auth/ # register, login, profile, JWT
│ ├── router.py
│ ├── service.py
│ └── schemas.py
├── workspace/ # workspaces, members, invites, SMTP
│ ├── router.py
│ ├── service.py
│ └── schemas.py
├── chat/ # groups, DMs, messages, reactions, threads, WebSocket
│ ├── router.py
│ ├── service.py
│ ├── schemas.py
│ └── ws.py
├── pockets/ # pockets, widgets, sharing via links, agents
│ ├── router.py
│ ├── service.py
│ └── schemas.py
├── sessions/ # session CRUD, runtime proxy, pocket auto-link
│ ├── router.py
│ ├── service.py
│ └── schemas.py
├── agents/ # agent discovery, CRUD
│ ├── router.py
│ ├── service.py
│ └── schemas.py
├── shared/ # cross-cutting concerns
│ ├── deps.py # current_user, workspace_id, require_role()
│ ├── db.py # MongoDB connection + Beanie init
│ ├── errors.py # CloudError hierarchy + exception handler
│ ├── events.py # internal async pub/sub for side effects
│ └── permissions.py # role checks, pocket access, share link validation
├── models/ # existing Beanie models (cleaned up)
└── __init__.py # mount all routers
```
## Data Model Changes
| Model | Changes |
|---|---|
| User | No change (fastapi-users BeanieBaseUser) |
| Workspace | Add `deleted_at` soft-delete, enforce seat limits at model level |
| Group | Add `last_message_at`, `message_count` counter |
| Message | Add `edited_at`, index on `(group_id, created_at)` for cursor pagination |
| Room | **Merge into Group** — DM is `type: "dm"` with 2 members |
| Pocket | Add `share_link_token`, `share_link_access` (view/comment/edit), `visibility` (private/workspace/public), `shared_with` (explicit user grants) |
| Session | Add `deleted_at` soft-delete |
| Invite | Add `revoked` flag, cleanup index on `expires_at` |
| Notification | Add `expires_at` for auto-cleanup |
| Comment, FileObj, Agent | No change |
**Session ↔ Pocket linking**: sessions auto-attach to pockets. Creating a pocket with `session_id` links the session. `Session.pocket_id` set on attachment.
## WebSocket Architecture (replacing Socket.IO)
Single endpoint: `ws://host/ws/cloud?token=<JWT>`
**Protocol** — typed JSON messages:
### Client → Server
- `message.send` — send message to group (content, reply_to)
- `message.edit` — edit own message
- `message.delete` — soft-delete message
- `message.react` — add/remove reaction
- `typing.start` / `typing.stop` — scoped to group, auto-expire 5s
- `presence.update` — online/away status
- `read.ack` — mark messages read up to ID
### Server → Client
- `message.new` — new message in group
- `message.edited` — message edited
- `message.deleted` — message deleted
- `message.reaction` — reaction added/removed
- `typing` — typing indicator
- `presence` — user online/offline/away
- `read.receipt` — read receipt
- `error` — error with code + message
**Design decisions**:
- Pydantic validation on every inbound message
- Group membership verified on every send
- Connection manager: `user_id → set[WebSocket]` (multi-tab/device)
- 30s grace period on disconnect before marking offline
- Graceful degradation: REST endpoints work without WebSocket
## Error Handling
```python
CloudError(status_code, code, message)
├── NotFound # 404 — "group.not_found"
├── Forbidden # 403 — "workspace.not_member"
├── ConflictError # 409 — "workspace.slug_taken"
├── ValidationError # 422 — "message.too_long"
└── SeatLimitError # 402 — "workspace.seat_limit_reached"
```
Single exception handler returns: `{ "error": { "code": "...", "message": "..." } }`
## Internal Event Bus
| Event | Triggers |
|---|---|
| `invite.accepted` | notification + auto-add to default groups |
| `message.sent` | notifications for mentions, update group `last_message_at` |
| `pocket.shared` | notification for recipient |
| `member.removed` | cleanup group memberships, revoke pocket access |
| `session.created` | link to pocket if `pocket_id` provided |
Simple async callback registry, in-process.
## Permissions
- **Workspace roles**: owner > admin > member
- **Pocket access**: owner / edit / comment / view (explicit grants or share links)
- **Group access**: member check, public groups allow self-join
- **Share links**: token validated for expiry, revocation, access level
- **DMs**: any workspace member can DM any other member
## API Endpoints
### auth — `/api/v1/auth`
- POST `/register`, `/login`, `/logout`
- GET/PATCH `/me`
- POST `/password/reset`, `/password/reset/confirm`
### workspace — `/api/v1/workspaces`
- CRUD: POST/GET/PATCH/DELETE `/`, `/{id}`
- Members: GET/PATCH/DELETE `/{id}/members`, `/{id}/members/{uid}`
- Invites: POST `/{id}/invites`, GET/POST `/invites/{token}`, DELETE `/{id}/invites/{invite_id}`
### chat — `/api/v1/chat`
- Groups: POST/GET `/groups`, GET/PATCH `/{id}`, POST `/{id}/archive`, `/{id}/join`, `/{id}/leave`
- Members: POST/DELETE `/{id}/members`, `/{id}/members/{uid}`
- Agents: POST/PATCH/DELETE `/{id}/agents`, `/{id}/agents/{aid}`
- Messages: GET/POST `/{id}/messages`, PATCH/DELETE `/messages/{id}`
- Reactions: POST `/messages/{id}/react`
- Threads: GET `/messages/{id}/thread`
- Pins: POST/DELETE `/{id}/pin/{mid}`
- Search: GET `/{id}/search`
- DMs: POST `/dm/{user_id}`
### pockets — `/api/v1/pockets`
- CRUD: POST/GET/PATCH/DELETE `/`, `/{id}`
- Widgets: POST/PATCH/DELETE `/{id}/widgets`, `/{id}/widgets/{wid}`, POST `/{id}/widgets/reorder`
- Team: POST/DELETE `/{id}/team`, `/{id}/team/{uid}`
- Agents: POST/DELETE `/{id}/agents`, `/{id}/agents/{aid}`
- Sharing: POST/PATCH/DELETE `/{id}/share`, GET `/shared/{token}`
- Sessions: POST/GET `/{id}/sessions`
### sessions — `/api/v1/sessions`
- CRUD: POST/GET/PATCH/DELETE `/`, `/{id}`
- History: GET `/{id}/history`
- Touch: POST `/{id}/touch`
### agents — `/api/v1/agents`
- CRUD: POST/GET/PATCH/DELETE `/`, `/{id}`
- By slug: GET `/uname/{slug}`
- Discovery: POST `/discover`
### WebSocket — `/ws/cloud`
- JWT auth on connect, typed JSON protocol as described above

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,243 @@
# Enterprise Agent Chat Endpoint — Design
**Status:** Draft
**Date:** 2026-04-23
**Owner:** prakash@snctm.com
**Module:** `backend/ee/cloud/chat/`
## Context
The OSS chat endpoint at `backend/src/pocketpaw/api/v1/chat.py` (`POST /chat`, `POST /chat/stream`, `POST /chat/stop`) is a stateless bridge from a user message to the AgentLoop. It has no notion of workspace, group, DM, pocket, presence, or ripple (dynamic UI) output, and it intentionally must stay that way for the single-user local product.
The enterprise cloud surface (`backend/ee/cloud/chat/router.py`) already provides workspace-scoped REST + WebSocket for groups, DMs, messages, reactions, threads, and pins, but it does **not** yet have a dedicated agent-generation endpoint. Agent replies inside the cloud flow today go through `ee/cloud/shared/agent_bridge.py` without scope-aware context, pocket-scoped tools, or structured ripple output.
## Goal
Add a fully separate enterprise agent chat endpoint that:
1. Lives entirely under the enterprise auth + license stack (`require_license`, `current_user_id`, `current_workspace_id`, scope-specific membership guards).
2. Is scope-aware for DM, group, and pocket contexts.
3. Streams rich output (chunks, tool events, thinking, ripple UI blocks) over SSE to the caller.
4. Broadcasts the finished assistant message (and agent typing state) to other scope members over the existing `/ws/cloud` WebSocket.
5. Mounts pocket-scoped tools for pocket runs, without leaking them to other scopes.
6. Shares the underlying AgentLoop engine with OSS — we wrap, we do not fork.
7. Routes soul observation and self-evaluation to the **target agent's** soul, fixing the current bug where the default PocketPaw soul is updated no matter which agent was actually addressed.
Non-goals:
- Changing `api/v1/chat.py` in any way.
- Live chunk-by-chunk broadcast to non-caller members (deferred; finished-message broadcast only for now).
- New WebSocket handler flows from the client (existing `/ws/cloud` is purely additive).
## Architecture
```
paw-enterprise (desktop)
│ POST /cloud/chat/{scope}/{scope_id}/agent (SSE, Bearer JWT)
┌──────────────────────────────────────────────────────────┐
│ ee/cloud/chat/agent_router.py (new) │
│ • auth: current_user_id, current_workspace_id, │
│ require_license, scope-specific guard │
│ • resolves ScopeContext (dm | group | pocket) │
│ • persists user message via MessageService │
│ • broadcasts "message.new" on /ws/cloud │
│ • spawns agent run via CloudAgentBridge │
│ • streams SSE back to caller (chunks, tool_*, ripple, │
│ stream_end) │
│ • on stream_end: persists assistant message, broadcasts │
│ "message.new" and "agent.typing" events │
└──────────┬───────────────────────────────────────────────┘
┌──────────────────────────────────────────────────────────┐
│ ee/cloud/chat/agent_service.py (new) │
│ • ScopeContext builder (DM / group / pocket) │
│ • Toolset assembler (base + pocket-scoped) │
│ • Participant / presence context block for system prompt│
│ • Ripple-block pass-through (no stripping) │
└──────────┬───────────────────────────────────────────────┘
┌──────────────────────────────────────────────────────────┐
│ ee/cloud/shared/agent_bridge.py (existing, extended) │
│ • wraps AgentLoop for a single cloud-scoped run │
│ • accepts ScopeContext + runtime toolset │
│ • emits AgentEvents; router adapts to SSE + WS │
└──────────────────────────────────────────────────────────┘
```
Key decisions:
- **One parametric route, three guards.** `POST /cloud/chat/{scope}/{scope_id}/agent` with `scope ∈ {dm, group, pocket}` dispatches to a scope-specific resolver. Less duplication than three separate routes; guards are chosen by scope in the resolver.
- **Separate endpoint, shared engine.** Agent backends, memory, tracing, and the message bus are all reused through `CloudAgentBridge`; only the cloud-specific context and toolset assembly is new.
- **Scope is explicit in the URL**, not inferred from a group type, so pocket-specific tool loading and presence semantics are unambiguous.
- **`/ws/cloud` stays the broadcast channel.** New outbound event types are added; no new inbound WS handler flows.
## Endpoint surface
### `POST /cloud/chat/{scope}/{scope_id}/agent`
SSE response. Auth: Bearer JWT. License: required. Membership: required for the resolved scope.
Request body (`CloudAgentChatRequest`):
```python
class CloudAgentChatRequest(BaseModel):
content: str
attachments: list[Attachment] = []
reply_to: str | None = None
mentions: list[str] = [] # user/agent ids explicitly addressed
agent_id: str | None = None # required for group scope when >1 agent member
client_message_id: str | None = None # idempotency key for the user message
```
SSE event sequence (in order, with optional events interleaved):
| Event | Data | When |
|-------|------|------|
| `message.persisted` | `{user_message_id, client_message_id}` | Immediately after the user message is written. |
| `stream_start` | `{run_id, agent_id, scope, scope_id}` | Agent run begins. `run_id` is a server-generated UUID used as the cancellation and trace key. |
| `thinking` | `{content}` | Backend emits thinking events. |
| `tool_start` | `{tool, input}` | Tool invocation starts. |
| `tool_result` | `{tool, output}` | Tool invocation completes. |
| `chunk` | `{content, type: "text"}` | Streamed text chunk. |
| `ripple` | `{spec}` | A complete ripple UI JSON block, emitted as a single event. Never split across chunks. |
| `pocket_created` | `{spec, session_id, pocket_cloud_id}` | Pocket scope only. |
| `pocket_mutation` | `{mutation}` | Pocket scope only. |
| `ask_user_question` | `{question, options}` | Agent requests clarification. |
| `stream_end` | `{assistant_message_id, usage, cancelled: bool}` | Run complete. |
| `error` | `{code, message}` | Run failed; stream closes after this event. |
### `POST /cloud/chat/{scope}/{scope_id}/agent/stop`
Cancels the in-flight run for the caller in the given scope. Mirrors OSS `/chat/stop`. Returns `{status: "ok"}` or 404 if no run is active.
### Existing `/ws/cloud` — additive events
Broadcast to all scope members **except the caller**:
| Event | Data | When |
|-------|------|------|
| `agent.typing` | `{scope, scope_id, agent_id, active: bool}` | Active on `stream_start`; inactive on `stream_end`/`error`. |
| `message.new` | `Message` document (existing shape) | Emitted once at `stream_end` with the fully-assembled assistant message, including any ripple blocks as structured content. |
| `message.failed` | `{scope, scope_id, agent_id, client_message_id, code}` | Emitted if the agent run errors before producing a persistable assistant message. |
Rationale for "caller gets chunks, others get finished message": avoids N clients rendering half-streamed ripple JSON, keeps broadcast volume sane, and matches Slack/Discord UX where remote viewers see finished bot messages. Live chunk broadcasting can be added later as opt-in.
## ScopeContext, presence & tools
`ee/cloud/chat/agent_service.py` resolves a `ScopeContext` per request:
| Scope | Resolution | Participants loaded | Presence | Tools mounted |
|-------|-----------|---------------------|----------|---------------|
| `dm` | `Group` where `type=dm` and caller is a member | The two users (or user+agent). | Online/offline of peer from WS manager. | Base toolset. |
| `group` | `Group` where caller is a member + license check | All group members + group agents. | Roster + typing state. | Base toolset. Group-level integrations reserved for later. |
| `pocket` | `Pocket` where caller has access | Pocket collaborators. | Pocket-scoped presence. | Base + pocket tools from `Pocket.tool_specs`. |
"Base toolset" means whatever `AgentLoop` currently exposes for its configured backend — cloud scopes do not subtract from it.
### Pocket tools
Each `Pocket` document declares a `tool_specs: list[dict]` field. Each entry identifies either:
- A built-in cloud tool by id.
- A workspace-registered MCP tool by id.
- An inline declarative tool.
`agent_service.assemble_toolset(scope_ctx)` merges the base toolset with pocket tools into the `AgentLoop` invocation for that single run. No global registry mutation — tools are scoped to the run.
### Presence and participant context
A new `CloudContextProvider` assembles a compact block for the system prompt via `AgentContextBuilder`:
```
<scope>{dm|group|pocket} {scope_id}</scope>
<participants>{compact roster}</participants>
<recent_activity>{typing state, recent joiners}</recent_activity>
```
This gives the agent the minimum situational awareness to address participants by name and tailor tone (DM vs group) without bloating the prompt.
## Soul routing (per-agent, not global)
**Current bug:** every turn on the AgentLoop path calls the process-global `SoulManager` (`AgentLoop._soul_manager`, registered as the module singleton `pocketpaw.soul.manager._manager`) in `_soul_observe_and_emit`. That soul represents the default PocketPaw agent. When a cloud user chats with a specific agent, the *default PocketPaw soul* observes the turn and evolves — the target agent's soul never updates. `AgentPool.observe(agent_id, ...)` exists for per-agent observation but is bypassed by the AgentLoop fast path.
**Fix:** the cloud agent chat run must route soul observation and self-evaluation to the **target agent's** soul, and must **not** touch the global PocketPaw soul (unless the target agent happens to be the default PocketPaw agent itself).
### Design
1. `ScopeContext.resolve_target_agent()` determines the single agent that is producing the reply for this run:
- `dm` with an agent peer → that agent.
- `group``request.agent_id` (required when >1 agent is a member; defaulted when exactly one agent is a member).
- `pocket` → the pocket's primary agent, or `request.agent_id` if the pocket has multiple agents.
2. `CloudAgentBridge` accepts `target_agent_id` and passes it to the run. It sets a per-run flag `suppress_global_soul_observe=True` on the AgentLoop invocation so the AgentLoop's global observation branch is skipped for this turn.
3. After `stream_end`, the bridge calls `AgentPool.observe(target_agent_id, user_input, assistant_output)` — which loads/creates a **per-agent `SoulManager` keyed by `agent_id`** and runs observe + self-evaluate against that soul file.
4. Per-agent soul files live at `~/.pocketpaw/souls/{agent_id}.soul` (local) and are persisted via the workspace-scoped soul store for cloud-managed agents. The default PocketPaw soul stays at its current path.
5. The bootstrap provider used for system-prompt assembly also switches per-run to the target agent's `SoulManager.bootstrap_provider`, so the agent's own identity, OCEAN, and memory are in the prompt — not the default PocketPaw soul's.
### AgentLoop changes (minimal, OSS-safe)
- Add an optional `suppress_global_soul_observe: bool` field on the per-run context (already threaded through `InboundMessage.metadata` or a new typed run config). When true, `_soul_observe_and_emit` is skipped for that turn.
- OSS behavior unchanged: the flag defaults to false, so `uv run pocketpaw` keeps updating the default PocketPaw soul exactly as before.
### Tests
- Cloud chat with agent A (not default) → A's soul file is updated; default PocketPaw soul file is byte-identical before/after.
- Cloud chat with the default PocketPaw agent → default soul updates as today.
- Group with two agents, `agent_id=B` in request → only B's soul updates.
- Pocket with primary agent C → C's soul updates.
## Error handling
- **Auth / license / membership:** rejected with 401/403/402 *before* opening the SSE stream. An auth error must never be streamed.
- **In-stream failures:** any exception inside the bridge emits an `error` SSE event with a `CloudError` code (see `ee/cloud/shared/errors.py`), then closes the stream. The user message is already persisted; the assistant message is **not** persisted on failure — avoids half-baked replies in history. A `message.failed` WS event is broadcast so other members see the attempt didn't land.
- **Cancellation:** `/stop` sets the run's cancel event, the bridge unsubscribes from the bus, and a `stream_end` with `{cancelled: true}` is emitted. No assistant message persisted. A final `agent.typing` inactive event is broadcast.
- **Concurrent runs per scope:** a new request for the same `(scope, scope_id, user_id)` cancels the prior in-flight run, mirroring OSS behavior. Tracked in an in-process `dict[(scope, scope_id, user_id), CancelEvent]`.
## Testing
- **Unit:**
- `ScopeContext` resolution for dm/group/pocket (including non-member, archived group, inaccessible pocket).
- Toolset assembly for pocket scope (base + pocket tools merged, duplicates deduped).
- `CloudError` → SSE `error` event mapping.
- Concurrent-run cancellation for the same `(scope, scope_id, user_id)`.
- **Integration** (FastAPI TestClient + beanie test DB):
- Full SSE round-trip per scope using a fake `AgentBackend` that yields a scripted event sequence including a ripple block. Assert order: `message.persisted``stream_start` → chunks → `ripple``stream_end`.
- Verify `message.new` broadcast on a second connected WS member at `stream_end`.
- Verify `agent.typing` active/inactive bracket the run.
- **Negative tests:** license disabled → 402 before stream; non-member → 403 before stream; invalid JWT → 401 before stream.
- **No change** to `backend/tests/test_api_chat.py`.
## File plan
New:
- `backend/ee/cloud/chat/agent_router.py` — SSE endpoint + `/stop`.
- `backend/ee/cloud/chat/agent_service.py``ScopeContext`, toolset assembly, `CloudContextProvider`.
- `backend/ee/cloud/chat/agent_schemas.py``CloudAgentChatRequest`, SSE event payload models.
- `backend/tests/ee/cloud/chat/test_agent_router.py`
- `backend/tests/ee/cloud/chat/test_agent_service.py`
Modified:
- `backend/ee/cloud/shared/agent_bridge.py` — accept `ScopeContext` + runtime toolset + `target_agent_id`; set `suppress_global_soul_observe=True`; call `AgentPool.observe(target_agent_id, …)` on stream_end; swap bootstrap provider to target agent's soul for the run.
- `backend/ee/cloud/chat/router.py` — include the new `agent_router`.
- `backend/ee/cloud/models/pocket.py` — add `tool_specs: list[dict]` field if not already present.
- `backend/src/pocketpaw/agents/loop.py` — honor `suppress_global_soul_observe` per-run flag; default false so OSS behavior is unchanged.
- `backend/src/pocketpaw/agents/pool.py` — ensure `observe(agent_id, …)` loads/creates a per-agent `SoulManager` keyed by `agent_id` with its own soul file path.
Unchanged:
- `backend/src/pocketpaw/api/v1/chat.py` — OSS path remains pristine.
- `backend/ee/cloud/chat/ws.py` — only additive new event types.
## Open items deferred
These are explicitly out of scope for this iteration and will be revisited:
- Live chunk broadcast to non-caller members.
- Multi-agent turn-taking inside a group (which agent replies when).
- Persisting tool traces as structured sub-documents on the assistant `Message`.
- Rate limiting per `(workspace, user)`.

View File

@@ -0,0 +1,247 @@
# agent — Agent configuration and metadata storage for workspace-scoped AI agents
> This module defines the data models for storing agent configurations in the OCEAN system, including both the agent's core metadata (name, workspace, ownership) and its behavioral configuration (model, system prompt, tools, personality traits via the SOUL framework). It exists as a separate model layer to cleanly separate agent *configuration* from agent *execution*, enabling other services to query, update, and orchestrate agents without coupling to runtime concerns. The module is foundational to the agent management system and integrates with higher-level services like AgentService and GroupService that depend on these schemas.
**Categories:** agent management, data model layer, MongoDB document, schema definition, configuration storage
**Concepts:** Agent, AgentConfig, TimestampedDocument, Beanie ODM, Pydantic BaseModel, workspace scoping, multi-tenancy, LLM backend abstraction, SOUL framework, Big Five personality (OCEAN)
**Words:** 1617 | **Version:** 1
---
## Purpose
The `agent` module provides the **data model layer** for agent configurations in the OCEAN system. Its core responsibility is to define what an agent *is* (its identity, capabilities, and behavioral settings) separately from what an agent *does* (execution, invocation, state management).
### Why Separate Configuration from Execution?
This separation of concerns is critical because:
- **Agents are long-lived declarative objects**: An agent's configuration is created once and referenced many times across multiple execution contexts, users, and workspaces.
- **Configuration drives behavior without coupling**: Services that invoke agents (group_service, service, agent_bridge) need to query and apply agent config without importing execution logic.
- **Clear ownership and audit trail**: Configuration changes are tracked separately from runtime logs, enabling better governance and debugging.
### Role in System Architecture
This module sits at the **data model layer** and serves as the single source of truth for agent definitions. It is consumed by:
1. **Service layer** (service, group_service) — reads agent config to determine how to invoke agents
2. **Bridge layer** (agent_bridge) — translates agent config into backend-specific execution parameters
3. **API routes** (imported via __init__) — expose agents for CRUD operations via REST
The module depends only on `base` (for TimestampedDocument), keeping its scope tight and reusable.
## Key Classes and Methods
### AgentConfig (Pydantic BaseModel)
**Purpose**: A reusable configuration schema that encapsulates all behavioral parameters for how an agent should operate.
**Key Fields**:
- **Backend Integration**
- `backend: str = "claude_agent_sdk"` — specifies which LLM backend to use (extensible for future backends like GPT, Llama, etc.)
- `model: str = ""` — the specific model identifier; empty string means "use backend's default"
- `system_prompt: str = ""` — the system message sent to the LLM to shape behavior
- `tools: list[str]` — list of tool/function names the agent can invoke (e.g., ["search", "calculator"])
- **Generation Parameters** (standard LLM hyperparameters)
- `temperature: float = 0.7` — creativity vs. determinism (02 range)
- `max_tokens: int = 4096` — response length limit
- `trust_level: int = 3` — custom constraint for permission/capability escalation (15 scale)
- **SOUL Framework Integration** (personality and values)
- `soul_enabled: bool = True` — feature flag for SOUL personality system
- `soul_persona: str = ""` — a high-level persona description (e.g., "helpful researcher", "strict auditor")
- `soul_archetype: str = ""` — optional classification into predefined archetypes
- `soul_values: list[str]` — explicit values the agent should prioritize (default: ["helpfulness", "accuracy"])
- `soul_ocean: dict[str, float]` — the Big Five personality traits (OCEAN model) scored 01
- `openness`: curiosity and creative thinking
- `conscientiousness`: attention to detail and reliability
- `extraversion`: sociability and proactiveness
- `agreeableness`: cooperation and empathy
- `neuroticism`: emotional stability (lower is better)
**Design**: AgentConfig is a pure Pydantic BaseModel (not a document), which means it's always embedded in an Agent document and never stored independently. This ensures agent config and agent metadata are always co-located.
### Agent (TimestampedDocument)
**Purpose**: The persistent MongoDB document representing a single agent definition in a workspace. Combines metadata with configuration.
**Key Fields**:
- **Identity & Scope**
- `workspace: Indexed(str)` — which workspace owns this agent (critical for multi-tenancy)
- `name: str` — human-readable agent name
- `slug: str` — URL-friendly unique identifier (typically `workspace:agent-name`)
- `owner: str` — User ID of the agent creator/owner (for access control)
- **Presentation**
- `avatar: str = ""` — URL or emoji for UI representation
- `visibility: str = "private"` — enum: "private" (owner only), "workspace" (all workspace members), or "public" (system-wide)
- **Behavior**
- `config: AgentConfig = Field(default_factory=AgentConfig)` — embedded configuration object
- **Timestamps** (inherited from TimestampedDocument)
- `created_at`, `updated_at` — automatic audit trail
**MongoDB Settings**:
```python
class Settings:
name = "agents" # collection name
indexes = [
[('workspace', 1), ('slug', 1)] # compound index for efficient scoped queries
]
```
This compound index optimizes the common query pattern: *"find agent by workspace and slug"* — enabling fast lookups when resolving agent references in group workflows.
## How It Works
### Data Flow
1. **Creation**: A user creates an agent via an API endpoint, which validates the input against Agent/AgentConfig Pydantic schemas and stores it in MongoDB.
2. **Configuration Retrieval**: When a service (e.g., GroupService) needs to execute an agent, it queries `agents` collection by `(workspace, slug)` using the compound index.
3. **Configuration Application**: The retrieved AgentConfig is passed to agent_bridge, which translates it into backend-specific parameters (e.g., Claude SDK initialization).
4. **Update**: Configuration changes are applied with automatic timestamp updates via TimestampedDocument's middleware.
### Validation & Constraints
- **Trust Level**: Bounded to 15 to prevent invalid escalation levels
- **Temperature**: Bounded to 02 (standard LLM range)
- **Max Tokens**: Minimum 1 token to prevent empty generations
- **Visibility**: Regex pattern enforces exactly three allowed values
- **Workspace Scoping**: Every agent is bound to a workspace via the indexed field, ensuring isolation in multi-tenant deployments
### Edge Cases
- **Empty model field**: When `model: ""`, the bridge layer interprets this as "use backend's default model" — enabling version-agnostic config
- **Default SOUL values**: If `soul_ocean` is not provided, all OCEAN traits default to sensible middle-ground values (0.7, 0.85, 0.5, 0.8, 0.2)
- **Disabled SOUL**: When `soul_enabled: False`, higher layers should ignore all soul_* fields, treating the agent as a pure LLM without personality constraints
## Authorization and Security
Access control is **not enforced in this module** — it's enforced at the API and service layers:
- **Query Filtering**: Services that fetch agents filter by workspace and visibility before returning config to users
- **Ownership Tracking**: The `owner` field records the creator and can be checked by services to allow owner-only updates
- **Visibility Levels**:
- `private`: Only the owner can access
- `workspace`: Any workspace member can access
- `public`: Any authenticated user can access (system-wide)
The schema itself has no permission logic — it's a pure data container. Permission enforcement happens in service layers (service, group_service) before they query or return Agent documents.
## Dependencies and Integration
### Upstream Dependencies
- **base** (`ee.cloud.models.base`)
- Provides `TimestampedDocument` — a MongoDB-aware base class with automatic `created_at`/`updated_at` fields
- Implies the use of Beanie ODM for MongoDB integration
- **Beanie** (`beanie.Indexed`)
- Indexed wrapper for MongoDB field indexing — the `Indexed(str)` annotation tells Beanie to create a database index on the workspace field
- **Pydantic**
- BaseModel for schema validation and serialization
- Field constraints (ge, le, pattern) for runtime validation
### Downstream Dependencies
- **service** — Reads Agent config to expose CRUD operations via REST and coordinates agent execution
- **group_service** — Queries agents by workspace/slug to resolve references in group definitions and orchestrate multi-agent workflows
- **agent_bridge** — Consumes AgentConfig and translates it into backend-specific parameters (e.g., Claude SDK arguments)
- **__init__** — Re-exports Agent and AgentConfig for easy importing across the codebase
### Data Flow Example
```
Client API Request
service.create_agent(Agent) ← validates against Agent schema
MongoDB agents collection ← stored with timestamps
group_service.resolve_agent(workspace, slug)
query agents collection using indexed (workspace, slug)
agent_bridge.prepare_execution(agent.config)
Backend-specific LLM client initialization
```
## Design Decisions
### 1. Configuration as Embedded Document (Not Reference)
**Decision**: AgentConfig is embedded in Agent, not stored separately.
**Rationale**:
- Agent configuration and metadata are always updated together and accessed together
- Avoids extra database lookups
- Ensures configuration consistency — no possibility of a dangling config reference
- Simpler schema semantics: an Agent is self-contained
### 2. SOUL Framework Integration at the Model Layer
**Decision**: Personality and values configuration is stored at the model layer, not hidden in a service or config file.
**Rationale**:
- SOUL traits are part of the agent's persistent identity, not runtime state
- Enables auditing: you can see when and how an agent's persona changed
- Allows different agents in the same workspace to have different personalities
- Separates concerns: the model layer says *what* personality to use; the bridge layer says *how* to apply it
### 3. Workspace Scoping at the Schema Level
**Decision**: Every agent is indexed by workspace.
**Rationale**:
- Multi-tenancy is a first-class concern in OCEAN; scoping it in the schema ensures it can't be accidentally bypassed
- The compound index (workspace, slug) makes the most common query pattern fast
- Prevents accidental cross-workspace access
### 4. Visibility Enum as a String Pattern (Not an Enum Class)
**Decision**: `visibility: str = Field(pattern="^(private|workspace|public)$")` instead of `visibility: VisibilityEnum`
**Rationale**:
- Simpler schema — avoids needing a separate Enum class
- JSON serialization is straightforward (string vs. enum)
- Easier for frontend integration and API documentation
- Pydantic validates the pattern at runtime
### 5. Optional/Empty Model Field
**Decision**: `model: str = ""` (empty string means "use backend default") instead of `model: str | None`
**Rationale**:
- JSON schema compatibility: empty string is cleaner than null for APIs
- Explicit vs. implicit: empty string is a clear "no preference" signal
- Reduces null checks in consuming code
### 6. Trust Level as Custom Constraint
**Decision**: `trust_level: int` (15) rather than a backend-native parameter.
**Rationale**:
- OCEAN-specific: trust_level is not a standard LLM parameter; it's a custom permission/capability escalation mechanism
- Allows fine-grained control over what actions an agent can take (e.g., level 5 can delete, level 1 can only read)
- Decoupled from backend: each backend interprets trust_level independently
## Common Patterns in This Module
- **Stateless Document Schema**: Agent and AgentConfig are pure data models with no methods; all business logic lives in service layers
- **Pydantic Validation**: Constraints (ge, le, pattern) ensure invalid configs cannot be persisted
- **MongoDB Indexing**: Compound index on (workspace, slug) optimizes the scoped query pattern
- **Embedding Pattern**: Config is embedded in Agent, not referenced, ensuring atomicity
- **Extensible Backend**: The backend field enables future support for multiple LLM providers
- **Default Factory**: SOUL values use lambda defaults to avoid mutable default issues
---
## Related
- [base-foundational-document-model-with-automatic-timestamp-management-for-mongodb](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md)
- [untitled](untitled.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)

View File

@@ -0,0 +1,187 @@
# auth/__init__ — Central re-export hub for authentication and user management
> This module serves as the public API facade for the entire authentication domain, re-exporting core authentication utilities, user management classes, security backends, and routing. It exists to provide a clean, stable interface that shields downstream code from internal restructuring while maintaining backward compatibility. Within the system architecture, it acts as the single entry point for all auth-related functionality needed by other domains.
**Categories:** Authentication & Authorization, API Gateway Layer, Facade & Re-export Pattern, Security Infrastructure
**Concepts:** FastAPI dependency injection, JWT (JSON Web Token) authentication, Beanie ODM, FastAPI-Users framework, cookie_backend, bearer_backend, UserManager, current_active_user, current_optional_user, Pydantic models (UserRead, UserCreate)
**Words:** 1421 | **Version:** 1
---
## Purpose
This `__init__.py` module is a **re-export facade** that consolidates the authentication domain's public interface. Rather than forcing downstream modules to navigate the internal structure of the `ee.cloud.auth` package, this module collects everything important from two primary sub-modules (`core` and `router`) and exposes it under a single import namespace.
**Why it exists:**
- **Backward compatibility**: As the auth domain evolves internally, existing code importing from `ee.cloud.auth` continues to work without modification
- **Clear API boundary**: Explicitly defines what is "public" (re-exported) versus what is "private" (not exported). The `# noqa: F401` comments tell linters these imports are intentional despite appearing unused
- **Simplified imports**: Callers can write `from ee.cloud.auth import current_active_user` instead of `from ee.cloud.auth.core import current_active_user`
- **Single responsibility**: This file documents the contract of the auth domain at a glance
**Role in system architecture:**
The authentication domain is foundational—it manages user identity, credentials, session management, and authorization primitives. Every other domain (workspace, user, group, notification, etc.) depends on it to identify who is making requests and whether they have permission to proceed. This `__init__.py` ensures that critical abstractions like `current_active_user`, `fastapi_users`, and security backends are discoverable and stable.
## Key Classes and Methods
### Dependency Injection Helpers
**`current_active_user`**
- A FastAPI dependency that extracts the authenticated user from the current request context
- Used in route handlers as a function parameter; FastAPI automatically calls it and injects the result
- Raises an exception if no valid authentication token is present (enforces required auth)
**`current_optional_user`**
- A FastAPI dependency similar to `current_active_user`, but allows anonymous requests
- Returns `None` if no authentication token is present, otherwise returns the user object
- Useful for endpoints that support both authenticated and unauthenticated access
### User Management
**`UserManager`**
- The core service class responsible for user lifecycle operations: creation, retrieval, updates, password changes, verification
- Implements business logic for user validation, password hashing, and status transitions
- Likely uses Beanie ODM to persist users to the database
**`UserRead` and `UserCreate`**
- Pydantic models for serialization/deserialization
- `UserRead`: the shape of user data returned to clients (excludes passwords)
- `UserCreate`: the shape of data clients send when creating a new user (includes password)
**`seed_admin`**
- A utility function for initial system setup that creates the first admin user
- Called once during application bootstrap; prevents locking out of the system
### Security Infrastructure
**`fastapi_users`**
- A pre-configured FastAPI-Users instance that bridges the auth system to HTTP
- Provides standard routes like `/register`, `/login`, `/logout` and handles protocol details
- Integrates with the user database and security backends
**`get_jwt_strategy`, `get_user_manager`, `get_user_db`**
- FastAPI dependencies that provide access to core auth components
- `get_jwt_strategy`: returns the JWT token generation/validation logic
- `get_user_manager`: returns the UserManager instance for the current request
- `get_user_db`: returns the database accessor for users
- These are usually internal dependencies; rarely called directly by application code
**`cookie_backend` and `bearer_backend`**
- Two authentication backends supporting different credential formats
- `cookie_backend`: reads auth tokens from HTTP cookies (browser-friendly)
- `bearer_backend`: reads auth tokens from the `Authorization: Bearer <token>` header (API-friendly)
- Both backends produce valid sessions; a client can use either strategy
### Configuration
**`SECRET`**
- The cryptographic key used to sign and verify JWTs
- Must be kept confidential; compromise of `SECRET` allows forgery of any valid token
- Typically loaded from environment variables at startup
**`TOKEN_LIFETIME`**
- The duration (in seconds or timedelta) for which a JWT remains valid after issuance
- Represents the security vs. convenience trade-off: short lifetime requires frequent re-auth, long lifetime extends the window a stolen token is useful
### Routing
**`router`**
- A FastAPI `APIRouter` instance that mounts all authentication endpoints
- Typically includes login, logout, registration, password reset, and token refresh routes
- Imported directly and included in the main FastAPI app's routing configuration
## How It Works
**Import-time behavior:**
When `ee.cloud.auth` is first imported, this `__init__.py` executes, loading and re-exporting symbols from `core` and `router`. The `# noqa: F401` comments suppress linter warnings about unused imports—they are unused *locally* but used by *importers*.
**Typical authentication flow:**
1. A client makes an HTTP request with credentials (username/password) or a token (JWT in bearer header or cookie)
2. A route handler declares `current_user = current_active_user` as a dependency
3. FastAPI calls this dependency function, which validates the token/credentials against the auth backends
4. If valid, `current_user` is injected with the user object; the route handler executes with access to that user
5. If invalid, an HTTP 401/403 response is returned before the route handler runs
**Database integration:**
The `UserManager` and `get_user_db` work with a Beanie ODM backend (based on the import graph), persisting users to MongoDB. Password hashing is applied transparently—raw passwords are never stored.
**Token lifecycle:**
1. On login, `fastapi_users` creates a JWT signed with `SECRET` and sets `TOKEN_LIFETIME` as the expiration
2. The JWT is returned in the response (via cookie or body, depending on the backend)
3. Subsequent requests include this JWT
4. The `bearer_backend` or `cookie_backend` validates the JWT signature and expiration
5. If the token is expired, the client must re-authenticate (login again)
## Authorization and Security
**Authentication vs. Authorization:**
This module handles *authentication* (who are you?) but delegates *authorization* (what can you do?) to other domains. For example, workspace membership, role assignments, and resource permissions are likely determined by the `workspace`, `group`, and `permission` modules, which query this auth domain to learn the current user's identity.
**Token security:**
- Tokens are cryptographically signed with `SECRET`; forgery requires knowledge of the secret
- Tokens have a finite lifetime (`TOKEN_LIFETIME`); stolen tokens eventually expire
- Tokens should be transmitted over HTTPS to prevent interception
- The `cookie_backend` supports HttpOnly cookies, preventing JavaScript from accessing tokens (mitigates XSS token theft)
- The `bearer_backend` is stateless; the server doesn't maintain a session store, relying entirely on token signatures
**Access patterns:**
- `current_active_user` enforces authentication; endpoints using it require a valid token
- `current_optional_user` allows anonymous access; endpoints using it can serve both authenticated and unauthenticated clients
- Both return the User object, which other modules can then use to check permissions (e.g., does the user belong to this workspace?)
## Dependencies and Integration
**What this module depends on:**
- **`ee.cloud.auth.core`**: The concrete implementation of authentication logic, including `UserManager`, security backends, and JWT strategy
- **`ee.cloud.auth.router`**: FastAPI routes for login, registration, logout, etc.
- **External: FastAPI-Users library**: Provides the base `fastapi_users` instance and authentication patterns
- **External: Beanie ODM**: Likely used by `UserManager` to persist users to MongoDB
- **External: python-jose or similar**: JWT creation/validation
**What depends on this module:**
The import graph shows that other domains like `errors`, `workspace`, `license`, `user`, `group`, `invite`, `message`, `notification`, `pocket`, and `session` depend on auth. They import `current_active_user`, `current_optional_user`, or `UserManager` to:
- Inject the current user into route handlers
- Look up user metadata
- Validate that an action is performed by an authenticated principal
- Enforce workspace-scoped or role-based authorization
**Example integration:**
The `workspace` module might import `current_active_user` to ensure only authenticated users can create workspaces, then check workspace membership separately to enforce resource isolation.
## Design Decisions
**1. Facade pattern via re-exports**
Instead of keeping all exports in separate internal modules and requiring deep imports, this `__init__.py` collects them. Trade-off: slightly more code in `__init__.py`, but significantly improved external API clarity and refactor tolerance.
**2. Dual authentication backends (cookie + bearer)**
Supporting both cookies and bearer tokens allows the system to serve multiple client types (browsers, SPAs, native apps, server-to-server) from a single backend. Backends are plugged into `fastapi_users`; switching or adding backends requires only configuration, not code changes—good extensibility.
**3. Separation of concerns: core vs. router**
The `core` module encapsulates the business logic and data models; the `router` module adds HTTP semantics (request/response serialization, status codes, error messages). This separation makes the auth logic testable without HTTP, and allows multiple HTTP transports (REST, GraphQL, WebSocket) to reuse the same core logic if needed.
**4. Dependency injection for `UserManager`, JWT strategy, etc.**
Rather than exposing these as singletons or module-level variables, they are injected via FastAPI dependencies. This enables:
- Testing with mock implementations
- Per-request customization (e.g., different strategies for different clients)
- Lazy initialization and resource cleanup
**5. No explicit token revocation list**
Both backends are stateless—there is no server-side session store or revocation list. Once a token is issued, it's valid until expiration. This is appropriate for a distributed, scalable system but means logout cannot immediately invalidate tokens (the client must discard the token, and the server cannot force it). Some systems add a short-lived in-memory revocation cache for stronger logout guarantees.
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,51 @@
# AuthService: Business Logic Layer for Authentication and User Profile Management
> AuthService is a stateless FastAPI service that encapsulates authentication and user profile management business logic. It provides three main operations: retrieving user profiles, updating mutable profile fields, and managing active workspace selection.
**Categories:** Authentication & Authorization, User Management, Business Logic Layer
**Concepts:** AuthService, User Profile, ProfileUpdateRequest, Active Workspace, User Model, Email Verification, Avatar, HTTPException, Stateless Service, FastAPI
**Words:** 207 | **Version:** 2
---
## Overview
AuthService is a stateless service class that handles core authentication and user profile business logic for the cloud platform. It operates as an abstraction layer between API endpoints and data models.
## Core Methods
### get_profile
Retrieves the current user's complete profile information and returns it as a dictionary.
**Returns:**
- `id`: User identifier (string)
- `email`: User email address
- `name`: User's full name
- `image`: User's avatar URL
- `emailVerified`: Boolean indicating email verification status
- `activeWorkspace`: Currently active workspace identifier
- `workspaces`: Array of workspace objects containing workspace ID and user role
### update_profile
Updates mutable user profile fields and persists changes to the database.
**Mutable Fields:**
- `full_name`: User's display name
- `avatar`: User's profile image
- `status`: User status indicator
All fields are optional and only updated if provided (non-null values). Returns the updated profile using `get_profile()` after persistence.
### set_active_workspace
Sets the user's active workspace context.
**Validation:**
- Raises `HTTPException` with status code 400 if `workspace_id` is empty or missing
- Persists the change to the database
## Architecture
All methods are implemented as static methods, making the service stateless and enabling straightforward testing and composition. The service depends on the `User` model and `ProfileUpdateRequest` schema for type definitions.

View File

@@ -0,0 +1,132 @@
# backend_adapter — Adapter that makes PocketPaw's agent backends usable as knowledge base CompilerBackends
> This module provides `PocketPawCompilerBackend`, an adapter class that implements the knowledge_base compiler protocol by delegating to PocketPaw's pluggable agent backend registry (Claude SDK, OpenAI, etc.). It exists to decouple the standalone knowledge-base package from PocketPaw's specific LLM infrastructure, allowing KB compilation to automatically use whatever agent backend is currently active in the system. This bridges the gap between the generic knowledge_base.compiler.CompilerBackend interface and PocketPaw's concrete backend implementations.
**Categories:** Knowledge Base — Integration Layer, Adapter/Bridge Pattern, LLM Backend Abstraction, Agent Infrastructure
**Concepts:** PocketPawCompilerBackend, adapter pattern, facade pattern, CompilerBackend protocol, agent registry, get_backend_class, Settings, async streaming, lazy initialization, event-driven architecture
**Words:** 1266 | **Version:** 1
---
## Purpose
This module solves a critical architectural problem: the `knowledge_base` package is designed to be standalone and backend-agnostic, but it needs to call large language models (LLMs) during KB compilation (e.g., to generate structured JSON from prompts). Rather than embedding specific LLM dependencies into knowledge_base itself, PocketPaw uses an **adapter pattern** to bridge the two systems.
`PocketPawCompilerBackend` implements the `knowledge_base.compiler.CompilerBackend` protocol—a simple async interface requiring a `complete(prompt, system_prompt)` method—and delegates all actual LLM work to PocketPaw's agent registry. This allows KB compilation to respect PocketPaw's runtime configuration: whichever backend is active (Claude SDK, OpenAI, custom) automatically becomes the KB compiler's backend.
**In the system architecture**: Knowledge base lives in `/ee/cloud/kb/` as a relatively isolated subsystem. KB compilation operations (triggered by `router` or `knowledge` modules) import this adapter, which then reaches into PocketPaw's agent infrastructure. This allows PocketPaw to manage all LLM backend state in one place (the registry) while letting knowledge base remain decoupled.
## Key Classes and Methods
### PocketPawCompilerBackend
**Purpose**: Adapter class that makes PocketPaw's agent backends conform to the knowledge_base.compiler.CompilerBackend protocol.
**Key Methods**:
#### `__init__(backend_name: str = "", model: str = "")`
Initializes the adapter with optional overrides. If `backend_name` is provided, it overrides the default backend from settings. If `model` is provided, it updates the corresponding model setting (e.g., `claude_sdk_model` or `openai_model`). These parameters allow callers to request a specific backend or model without globally changing PocketPaw's configuration.
**Business logic**: Stores backend name and model as instance state so that `complete()` can apply these overrides when instantiating the actual backend.
#### `async def complete(prompt: str, system_prompt: str = "") -> str`
The core method implementing the CompilerBackend protocol. It orchestrates the full LLM call: loading settings, resolving the backend class, instantiating it, streaming its response, and cleaning up.
**Control flow**:
1. **Settings Resolution**: Loads PocketPaw's configuration via `Settings.load()`. Uses the provided `self._backend_name` if set; otherwise falls back to `settings.agent_backend` (the system's active backend).
2. **Model Override** (if `self._model` is set): Updates the appropriate model field in settings based on backend name. For example, if backend is "claude", sets `settings.claude_sdk_model`. This allows the caller to change models without mutating global config.
3. **Backend Resolution**: Calls `get_backend_class(backend_name)` to retrieve the backend class from PocketPaw's registry (e.g., `ClaudeBackend`, `OpenAIBackend`). If the backend isn't registered, logs a warning and returns an empty string (safe failure).
4. **Agent Instantiation**: Creates an instance of the backend class, passing the modified settings. This backend instance is responsible for authentication, HTTP setup, and LLM communication.
5. **Streaming and Aggregation**: Calls `agent.run(prompt, system_prompt=sys_prompt)` which returns an async generator of events. The method iterates over events, extracting message chunks (events where `type == "message"`). It stops when it receives a `"done"` event.
6. **Default System Prompt**: If no system_prompt is provided, uses a hardcoded default: `"You are a knowledge compiler. Output only valid JSON."` This guides the LLM to output structured data suitable for KB compilation.
7. **Cleanup**: The `finally` block ensures `await agent.stop()` is called, allowing backends to close connections, free resources, or log telemetry.
**Business logic**: The method treats streaming responses as chunks and concatenates them into a single string. This is idiomatic for LLM APIs that return tokens incrementally. The aggregated response is stripped of whitespace before returning.
## How It Works
**Data flow for a KB compilation request**:
```
router or knowledge module
Calls: PocketPawCompilerBackend(backend_name, model).complete(prompt, system_prompt)
complete() loads PocketPaw settings and resolves the backend from registry
Instantiates the backend (e.g., ClaudeBackend, OpenAIBackend) with merged settings
AsyncIO streams LLM response via agent.run()
Aggregates chunks into single string
Returns complete response (valid JSON, typically)
```
**Key design observations**:
- **Lazy initialization**: The backend class is resolved at runtime in `complete()`, not at `__init__()` time. This allows the registry to be populated after the adapter is instantiated, and lets the system swap backends dynamically.
- **Event-driven streaming**: Rather than awaiting a single response, the code iterates over async events. This is essential for long-running LLM calls—it can start processing output while the backend is still generating tokens. The code filters for `type == "message"` events, implying the backend emits multiple event types.
- **Graceful degradation**: If a backend isn't available, the method returns an empty string rather than raising an exception. Callers should handle empty responses (which `router` or `knowledge` presumably do).
- **Settings immutability at instance level**: The adapter takes `backend_name` and `model` as init parameters, but doesn't modify global settings. Each call to `complete()` loads settings fresh, applies overrides locally, and uses the modified settings only for that agent instance. This prevents cross-request state pollution.
## Authorization and Security
No explicit access controls are defined in this module. However, implicit security assumptions:
- **Backend registry access**: The call to `get_backend_class()` assumes the agent registry is available and populated. If authentication or registry ACLs are enforced elsewhere (in the agents subsystem), they would prevent unauthorized backends from being loaded.
- **Credentials delegation**: The module does not handle API keys or authentication. It passes settings to the backend class, which is responsible for using credentials (e.g., ANTHROPIC_API_KEY for Claude, openai.api_key for OpenAI) from PocketPaw's configuration.
- **Default system prompt injection**: The hardcoded system prompt (`"You are a knowledge compiler. Output only valid JSON."`) is benign but fixed. Callers can override it via the `system_prompt` parameter, so there's no prompt injection vulnerability from the default.
## Dependencies and Integration
**What this module depends on**:
- `pocketpaw.agents.registry.get_backend_class()`: Resolves backend classes by name. Indicates PocketPaw has a plugin architecture where backends are registered.
- `pocketpaw.config.Settings`: Loads PocketPaw's active configuration (backend name, model names, API keys, etc.). Suggests a centralized config system, likely environment-based or YAML-driven.
- `logging`: Standard Python logging for non-blocking warnings (e.g., backend not found).
**What depends on this module**:
- **`knowledge`**: Presumably imports `PocketPawCompilerBackend` to instantiate it when KB operations need LLM support (e.g., inferring KB structure from data).
- **`router`**: Likely uses this adapter to service HTTP endpoints that trigger KB compilation with a chosen backend.
**Integration pattern**: This module is a thin facade that translates between two independent protocols: the knowledge_base.compiler.CompilerBackend interface (async method signature) and PocketPaw's agent interface (streaming events, backend registry). It adds minimal logic, acting primarily as a bridge.
## Design Decisions
**1. Adapter Pattern (Facade Pattern variant)**
Rather than modifying knowledge_base to import PocketPaw directly (tight coupling), an adapter class was created. This allows knowledge_base to remain a standalone package; PocketPaw depends on knowledge_base, not vice versa.
**2. Lazy Backend Resolution**
Backend classes are resolved at call time (`complete()`) rather than init time (`__init__()`). This supports dynamic backend switching and defers the cost of looking up the backend to when it's actually needed.
**3. Streaming over Single Response**
The method consumes async events from `agent.run()` and concatenates chunks. This is more idiomatic for LLM APIs and allows responses to be processed incrementally (though this module concatenates the full response before returning).
**4. Per-Call Settings Override**
The `backend_name` and `model` init parameters are stored but applied only within `complete()`. They don't mutate PocketPaw's global settings. This keeps instances stateless from a global perspective and makes behavior predictable when multiple requests are in flight.
**5. Graceful Degradation**
If a backend is unavailable, the method returns `""` instead of raising. This allows callers to handle empty responses gracefully (e.g., fall back to a cached response, skip compilation). It's a tradeoff between fail-fast and resilience.
**6. Default System Prompt**
A fixed system prompt is provided if none is given. This ensures the LLM is primed to output JSON (a knowledge base compilation requirement) even if the caller doesn't specify one. It's a sensible default but not user-configurable at the module level.
---
## Related
- [untitled](untitled.md)

View File

@@ -0,0 +1,175 @@
# base — Foundational document model with automatic timestamp management for MongoDB persistence
> This module provides `TimestampedDocument`, a base class that extends Beanie's ODM `Document` to automatically manage `createdAt` and `updatedAt` timestamps on all database operations. It exists to eliminate boilerplate timestamp logic across the system and ensure consistent, UTC-based audit trails on all domain entities. It serves as the architectural foundation for all entity models in the pocketPaw system (agents, messages, workspaces, etc.), enabling automatic temporal tracking without requiring downstream classes to implement timestamp logic.
**Categories:** data model layer, temporal auditing, MongoDB persistence, foundational infrastructure, cross-cutting concerns
**Concepts:** TimestampedDocument, createdAt, updatedAt, Beanie ODM, before_event decorator, Insert event, Replace event, Save event, Update event, UTC timezone
**Words:** 1441 | **Version:** 1
---
## Purpose
The `base` module solves a fundamental data modeling problem: maintaining reliable, consistent audit timestamps across all entities in a MongoDB-backed system. Rather than requiring every model class to implement timestamp management independently, this module provides a reusable base class that automatically captures when documents are created and modified.
**Why it exists as a separate module:**
- **DRY principle**: Prevents timestamp logic duplication across 7+ entity models (agent, comment, group, message, notification, pocket, session, workspace)
- **Centralized audit trail strategy**: Ensures all entities follow identical timestamp semantics (UTC-based, always maintained)
- **Extensibility foundation**: Other modules inherit from `TimestampedDocument` rather than raw Beanie `Document`, allowing future cross-cutting concerns to be added at this layer
- **Single source of truth**: Changes to timestamp behavior (e.g., timezone handling, precision) happen in one place
**Role in system architecture:**
This module occupies the **data model foundation layer**. It sits between the Beanie ODM framework (external dependency) and all domain entity models (agent, comment, group, etc.). Every persistent entity in the system inherits from `TimestampedDocument`, making this the lowest-level architectural contract that all models must satisfy.
## Key Classes and Methods
### TimestampedDocument
**Purpose**: Base class for all MongoDB documents that require automatic timestamp management.
**Fields**:
- `createdAt: datetime` — The UTC timestamp when the document was first inserted into the database. Set once at creation time and never modified afterward.
- `updatedAt: datetime` — The UTC timestamp of the most recent modification (insert, replace, save, or update). Updated on every write operation.
Both fields default to `datetime.now(UTC)` at instantiation time, but are overridden by the event handlers before any database operation.
**Methods**:
1. `_set_created()` (decorated with `@before_event(Insert)`)
- **When it runs**: Before any document is inserted for the first time
- **What it does**: Sets both `createdAt` and `updatedAt` to the current UTC time at the moment of insertion
- **Why both fields**: Ensures consistency; a newly created document has identical create and update timestamps initially
- **Design note**: This overwrites any `createdAt` value set during object instantiation, ensuring the timestamp reflects actual database insertion time, not object creation time
2. `_set_updated()` (decorated with `@before_event(Replace, Save, Update)`)
- **When it runs**: Before any document modification (full replacement, partial save, or atomic update)
- **What it does**: Sets only `updatedAt` to the current UTC time
- **Why only updatedAt**: Preserves `createdAt` unchanged; the original creation time must never shift
- **Event scope**: Catches all three modification paths (Replace, Save, Update), covering Beanie's full mutation API
**Settings**:
- `use_state_management = True` — Enables Beanie's internal state tracking, allowing the library to detect which fields have changed and optimize update operations
## How It Works
**Document lifecycle and timestamp flow**:
1. **Instantiation**: A subclass (e.g., `Agent`) creates an instance of itself, inheriting from `TimestampedDocument`.
```
agent = Agent(name="test")
# At this point: agent.createdAt and agent.updatedAt are set to now(UTC) by Field defaults
```
2. **First database write (Insert)**: When the document is inserted for the first time via `.insert()` or `.save()`, Beanie triggers the `Insert` event.
- `_set_created()` executes before the database operation
- Both `createdAt` and `updatedAt` are reset to the exact moment of insertion
- The document is written to MongoDB with both timestamps synchronized
3. **Subsequent modifications**: Any update operation (partial field change, full replace, or atomic update) triggers one of `Replace`, `Save`, or `Update` events.
- `_set_updated()` executes, refreshing only `updatedAt`
- `createdAt` remains unchanged (not touched by the event handler)
- MongoDB receives the updated document with the new `updatedAt` but original `createdAt`
**Why three separate events for updates**:
- **Replace**: Full document replacement (all fields overwritten)
- **Save**: Partial save in Beanie (specific fields saved)
- **Update**: Direct MongoDB update operations (atomic changes)
Together, these cover all mutation pathways in Beanie, ensuring `updatedAt` is refreshed regardless of which API the caller uses.
**Edge cases and guarantees**:
- **UTC timezone**: Using `UTC` ensures timestamps are never ambiguous or dependent on server timezone
- **Monotonicity of createdAt**: Once set, `createdAt` never changes, providing an immutable audit anchor
- **updatedAt always progresses**: Each modification advances `updatedAt` (assuming time moves forward), enabling last-modified sorting and cache invalidation
- **No manual intervention**: Developers cannot override timestamps; the Beanie event system enforces this at the database layer
## Authorization and Security
This module does not implement authorization logic directly. However, it provides an important **audit trail foundation**:
- **Temporal accountability**: The `createdAt` and `updatedAt` fields enable systems to answer "when was this entity modified?" which is essential for compliance logging, debugging, and temporal queries
- **Assumption of trust**: This module assumes all callers have already been authorized by upstream layers (e.g., API routers with authentication). It does not validate who is modifying what; it only records when modifications occur.
- **Immutable creation record**: The unchangeable `createdAt` field provides forensic value; even if data is modified later, the original creation timestamp persists.
Downstream authorization systems (not in this module) should use these fields to enforce policies like "only admins can modify documents older than 30 days" or "creator can only delete within 1 hour."
## Dependencies and Integration
**Direct dependencies**:
- **Beanie** (`Document`, `Insert`, `Replace`, `Save`, `Update`, `before_event`): MongoDB async ODM framework. This module tightly couples to Beanie's event system to intercept and modify documents before database operations.
- **Pydantic** (`Field`): Data validation and serialization. Used to define field defaults with factory functions.
- **Python standard library** (`datetime`, `UTC`): Timezone-aware UTC timestamps.
**Dependent modules** (7 documented imports):
1. **agent** — User-facing agents (e.g., AI assistants) inherit from `TimestampedDocument` to track creation and modification times
2. **comment** — Comments on entities (posts, tasks, etc.) need temporal ordering; inherits for `createdAt` sorting
3. **group** — Workspace/organization groups track membership changes; timestamps enable audit logs
4. **message** — Chat or notification messages require `createdAt` for chronological ordering in conversations
5. **notification** — Notifications need `updatedAt` to determine staleness and read status age
6. **pocket** — A core entity (possibly a workspace subdivision) with temporal tracking requirements
7. **session** — User sessions track login/logout and activity; timestamps are critical for session expiration and audit
8. **workspace** — Top-level organizational entity; creation and modification timestamps are foundational for workspace lifecycle management
**Integration pattern**:
```python
# Example from workspace.py or similar:
from ee.cloud.models.base import TimestampedDocument
class Workspace(TimestampedDocument):
name: str
# ... other fields ...
# Automatically gets createdAt and updatedAt tracking
```
Each dependent module adds its own domain-specific fields and methods while inheriting timestamp behavior automatically.
**System-wide implications**:
- All MongoDB queries can filter/sort by timestamp: `Workspace.find({"createdAt": {"$gte": start_date}})`
- API responses include temporal metadata for clients to track freshness
- Background jobs can identify stale entities (e.g., cleanup, archival)
- Audit systems have reliable temporal anchors for compliance reporting
## Design Decisions
**1. Automatic timestamp management via Beanie events**
- **Trade-off**: Developers cannot manually override timestamps (by design); code that attempts to set `createdAt` post-creation will fail silently because the event handler resets it.
- **Rationale**: Prevents accidental or malicious timestamp manipulation; the timestamp is a property of the system, not the data itself.
- **Alternative considered**: Manual timestamp management (developer sets fields). Rejected because it's error-prone and doesn't scale across 7+ models.
**2. UTC timezone exclusively**
- **Trade-off**: All timestamps are in UTC; display layers must handle timezone conversion for user-facing UI.
- **Rationale**: Eliminates ambiguity, simplifies comparisons, and aligns with international standards. A single source of truth for temporal ordering.
**3. Separate event handlers for Insert vs. Update**
- **Trade-off**: Code duplication (both set timestamps); conceptual distinction between creation and modification.
- **Rationale**: `createdAt` is immutable (set once at insertion); `updatedAt` is mutable (refreshed on every change). Separate handlers make this contract explicit and prevent future bugs if logic diverges.
**4. Field defaults via `Field(default_factory=...)`**
- **Trade-off**: Timestamps are set twice on insert (once by default_factory, then overridden by `_set_created`).
- **Rationale**: Ensures Pydantic validation passes (fields are never `None`), and the database always receives a second, more accurate timestamp. The tiny performance cost is negligible.
**5. use_state_management = True**
- **Trade-off**: Beanie tracks field changes in memory, adding memory overhead.
- **Rationale**: Enables partial updates and optimizes queries. Without this, every `.save()` would perform a full document replacement, defeating the purpose of selective updates.
**6. Inheritance-based composition**
- **Trade-off**: All entities must inherit from `TimestampedDocument` (tight coupling to this class).
- **Rationale**: Simpler than mixins or composition; leverages Python's class hierarchy cleanly. Mixin or decorator approaches would require more boilerplate for developers to get timestamps working.
## Architectural Principles
- **Separation of concerns**: Timestamp management is isolated from business logic (stored in subclasses)
- **DRY**: One implementation, many consumers
- **Immutable auditing**: Creation timestamp cannot change, ensuring forensic integrity
- **Eventual consistency ready**: Timestamps support distributed system concerns (causality, ordering)
---
## Related
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)

View File

@@ -0,0 +1,174 @@
# chat/__init__.py — Entry point for chat domain with groups, messages, and WebSocket real-time capabilities
> This module serves as the public API gateway for the chat domain, re-exporting the FastAPI router that handles all chat-related HTTP endpoints and WebSocket connections. It exists to provide a clean, consolidated entry point that other parts of the system (primarily the main application server) can import to register chat functionality. By isolating the chat domain behind a single import, it enables modular architecture where chat features can be independently versioned, tested, and scaled.
**Categories:** chat domain, API gateway / facade, module initialization, real-time messaging infrastructure
**Concepts:** FastAPI router, module facade pattern, domain-driven design, workspace scoping, multi-tenancy, event-driven architecture, WebSocket real-time, license gating, session management, re-export pattern
**Words:** 1115 | **Version:** 1
---
## Purpose
The `chat/__init__.py` module is the **architectural boundary** between the chat domain and the rest of the pocketPaw system. Its primary purposes are:
1. **Module Aggregation**: Groups all chat-related functionality (groups, messages, real-time WebSocket, notifications, etc.) under a single coherent namespace.
2. **Router Registration Point**: Exposes the FastAPI `router` object that the main application server imports and includes in its route configuration.
3. **Dependency Isolation**: Acts as a facade, hiding the internal structure of the chat domain (service layers, database models, event handlers) from consumers.
4. **Feature Gating**: By controlling what's imported and exported here, the architecture enables optional feature loading and licensing controls (the import of `license` in the module suggests chat features may be license-gated).
### System Architecture Context
pocketPaw appears to be an enterprise chat/collaboration platform with:
- **Workspace scoping**: Multiple organizations/workspaces, each with isolated chat data
- **Real-time messaging**: WebSocket support for live updates on groups and messages
- **Multi-tenant design**: User and license management integrated with chat features
- **Event-driven architecture**: Event handlers suggest asynchronous processing of chat events (message creation, group updates, etc.)
- **Modular domain design**: Chat is one domain among many (workspace, user, notification, etc.), each with independent concerns
## Key Classes and Methods
This module is intentionally minimal—it does **not** define any classes or methods itself. Instead, it re-exports:
### `router` (imported from `ee.cloud.chat.router`)
- **Type**: FastAPI `APIRouter` instance
- **Purpose**: Contains all HTTP endpoints and WebSocket handlers for chat operations
- **Responsibility**: Routes incoming requests to appropriate service handlers (likely including:
- Group CRUD operations (create, read, update, delete groups)
- Message CRUD and retrieval (send, fetch, edit, delete messages)
- WebSocket connections for real-time message delivery
- Membership management (adding/removing users from groups)
- Invite handling (creating and accepting group invitations)
- Notification delivery to group members
- **Usage**: The main application (likely in a top-level `main.py` or similar) imports this router and registers it with the FastAPI app:
```python
from ee.cloud.chat import router
app.include_router(router, prefix="/api/chat")
```
## How It Works
### Module Loading and Initialization
1. **Import Time**: When any code imports from `ee.cloud.chat`, Python executes this `__init__.py` file.
2. **Router Import**: The `from ee.cloud.chat.router import router` line imports the pre-built FastAPI router.
3. **Noqa Comment**: The `# noqa: F401` tells linters to ignore the "unused import" warning, since `router` is imported for re-export, not used directly in this file.
4. **Sub-module Loading**: The import graph shows this module has access to many sub-modules (`errors`, `event_handlers`, `agent_bridge`, `group`, `message`, etc.), which are loaded when the chat domain initializes.
### Request Flow
When a client makes a chat-related request:
1. **Request arrives** at the main FastAPI application
2. **Router matches** the request path against endpoints in `chat/router.py`
3. **Endpoint handler** (in router or delegated to service layer) processes the request
4. **Service layer** (e.g., `GroupService`, `MessageService`) executes business logic
5. **Database layer** (likely using models from `group.py`, `message.py`) persists or retrieves data
6. **Event system** (via `event_handlers.py`) emits events (e.g., "message_created", "group_updated")
7. **WebSocket broadcasts** (if applicable) notify connected clients of updates via `ws_manager` or similar
8. **Response** is returned to client
### Real-time Flow
For WebSocket connections (real-time messages):
1. Client establishes WebSocket connection to a group endpoint
2. `router.py` endpoint accepts the connection and registers the client session
3. When a message is sent via HTTP or another WebSocket, an event is emitted
4. Event handler broadcasts the message to all connected clients in that group
5. Clients receive updates in real-time without polling
## Authorization and Security
While this `__init__.py` doesn't contain authorization logic directly, the import of `license` and the presence of `user`, `workspace`, and `session` in the import graph indicate:
- **License gating**: Chat features may be restricted to certain license tiers
- **Workspace isolation**: Users can only access groups/messages within their workspace
- **Session validation**: WebSocket and HTTP endpoints likely validate that the requestor has an active session
- **Membership verification**: Users can only send messages to groups they're members of (implied by `group` and `invite` modules)
- **Agent bridge**: The `agent_bridge` import suggests service accounts or agents may have special access for automation
## Dependencies and Integration
### What This Module Imports
```
errors → Exception types for chat domain (ChatNotFoundError, etc.)
router → Main FastAPI router (re-exported)
workspace → Workspace isolation and context
license → Feature gating and access control
user → User identity and authentication
deps → Shared dependencies (database sessions, config)
event_handlers → Async event processing (message broadcasts, notifications)
agent_bridge → Service account or agent interactions
core → Shared core utilities
agent → AI agent integration
comment → Comment functionality (possibly message threading)
file → File attachment support in messages
group → Group domain model and service
invite → Group invitation model and service
message → Message domain model and service
notification → Notification delivery (email, push, in-app)
pocket → Custom/proprietary feature
session → WebSocket session management
```
### Who Depends on This Module
The import graph shows "Imported by: none (within scanned set)", meaning no other scanned modules directly import from `chat/__init__.py`. However, in a complete pocketPaw deployment:
- **Main application server** imports `router` to register chat endpoints
- **WebSocket manager** may consume session management
- **Notification service** may listen to chat events
- **Analytics/audit** may observe chat events
## Design Decisions
### 1. **Minimal Init File (Facade Pattern)**
This `__init__.py` deliberately exports only `router`, not individual services or models. This:
- **Prevents tight coupling**: Consumers depend on the API (router), not implementation details
- **Enables internal refactoring**: The chat domain can reorganize services without breaking imports elsewhere
- **Provides a single entry point**: Simplifies integration and reduces import confusion
### 2. **Router-Centric Architecture**
All chat functionality is exposed through FastAPI endpoints, not as direct service imports. This:
- **Enforces HTTP semantics**: Every operation goes through request/response validation
- **Enables middleware**: Logging, rate limiting, auth can be applied globally
- **Supports REST principles**: Standard HTTP methods map to operations
### 3. **Event-Driven Real-time**
The inclusion of `event_handlers` and `session` suggests chat uses an event-driven model:
- **Decoupling**: Message senders don't need to know about WebSocket connections
- **Scalability**: Events can be queued and processed asynchronously
- **Consistency**: All state changes flow through events, ensuring consistency across connected clients
### 4. **License and Workspace Scoping**
Imports of `license` and `workspace` indicate:
- **Multi-tenancy**: Groups and messages are scoped to workspaces
- **Feature licensing**: Chat features can be restricted by subscription tier
- **Isolation**: Users in different workspaces cannot see each other's messages
### 5. **Integration Modules**
The presence of `agent_bridge`, `comment`, and `file` suggests:
- **Rich messaging**: Messages can contain files, mentions, threads (comments)
- **Automation**: Bots/agents can interact with groups and messages
- **Extensibility**: The domain is designed to accommodate future features
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,189 @@
# comment — Threaded comments on pockets and widgets with workspace isolation
> This module defines the data models for a collaborative commenting system that enables threaded discussions on pockets (content containers) and widgets within a workspace. It exists to provide a structured, queryable representation of comments with support for mentions, resolution status, and hierarchical replies. The module serves as the persistence layer for collaborative feedback and discussion features in the PocketPaw platform.
**Categories:** Data Model / Persistence, Collaboration Features, Multi-tenant Architecture, Core Domain Model
**Concepts:** Comment, CommentTarget, CommentAuthor, TimestampedDocument, Threaded comments, Polymorphic targeting, Workspace scoping, Multi-tenant isolation, Immutable snapshots, Beanie ODM
**Words:** 1567 | **Version:** 1
---
## Purpose
The `comment` module provides domain models for a **threaded commenting system** in PocketPaw, enabling users to collaborate through inline discussions. Unlike simple flat comments, this system supports:
- **Hierarchical threads**: Comments can be replies to other comments (via the `thread` field), creating conversation branches
- **Multi-target commenting**: Comments can be attached to pockets, widgets, or agents via a flexible `CommentTarget` structure
- **Workspace isolation**: Comments are scoped to workspaces, ensuring data boundaries in multi-tenant environments
- **User mentions**: The `mentions` field tracks @-mentions for notifications and visibility
- **Resolution workflows**: Comments can be marked as resolved with audit trails (`resolved_by`), supporting issue-tracking patterns
This module exists separately because commenting is a **cross-cutting concern** that appears in multiple feature areas (pockets, widgets, agents) and requires consistent handling. By centralizing the data model, the system ensures uniform behavior for comment creation, querying, and lifecycle management across all target types.
## Key Classes and Methods
### CommentTarget(BaseModel)
**Purpose**: Encapsulates the location where a comment is attached, supporting polymorphic targeting of different entity types.
**Fields**:
- `type: str` — Enum-like field (pattern `"^(pocket|widget|agent)$"`) indicating the target entity type. This drives different business logic in consuming services (e.g., pocket comments vs. widget comments may have different permission models).
- `pocket_id: str` — Always required; the pocket containing the target. Even widget comments reference their parent pocket for workspace-level scoping.
- `widget_id: str | None` — Optional; specified only when the comment targets a widget within a pocket. A None value indicates a pocket-level comment.
**Business logic**: This design enforces that all comments exist within a pocket context, simplifying queries like "all comments in pocket X" without requiring joins. The optional `widget_id` allows granular targeting without forcing a separate table.
### CommentAuthor(BaseModel)
**Purpose**: Immutable snapshot of the comment author at creation time, preserving author information even if the user is later deleted or renamed.
**Fields**:
- `id: str` — User identifier (typically maps to a User document ID)
- `name: str` — Display name at time of commenting
- `avatar: str` — Avatar URL or embedded image reference (defaults to empty string for users without avatars)
**Business logic**: Storing author as a nested object rather than a reference means the UI can render "Alice commented" even if Alice's profile is later deleted. This is a common pattern in collaborative systems to maintain comment readability.
### Comment(TimestampedDocument)
**Purpose**: The primary data model representing a single comment in the system, with full lifecycle metadata.
**Inheritance**: Extends `TimestampedDocument` (from `ee.cloud.models.base`), providing `created_at` and `updated_at` timestamps automatically.
**Key fields**:
- `workspace: Indexed(str)` — Workspace identifier, indexed for efficient filtering. All queries will include `workspace` in their predicates to enforce multi-tenant isolation.
- `target: CommentTarget` — Where this comment is attached (pocket, widget, or agent)
- `thread: str | None` — Parent comment ID if this is a reply; None for root-level comments. Creates the threaded hierarchy.
- `author: CommentAuthor` — Who wrote this comment (immutable snapshot)
- `body: str` — Comment text content; no length limit enforced at model level (validation likely in service layer)
- `mentions: list[str]` — List of user IDs mentioned via @-mention syntax; used for notification triggers
- `resolved: bool` — Whether this comment/issue has been addressed (defaults to False)
- `resolved_by: str | None` — User ID who marked it resolved (audit trail)
**Database settings**:
```python
class Settings:
name = "comments" # Collection name in MongoDB
indexes = [
[(("target.pocket_id", 1), ("created_at", -1))]
]
```
The compound index on `(target.pocket_id, created_at)` optimizes the common query pattern: "fetch all comments for pocket X, sorted newest first." The descending order on `created_at` avoids additional sorting overhead.
## How It Works
### Data Flow
1. **Comment Creation**: When a user submits a comment, a consuming service (e.g., `CommentService` or an API route) creates a `Comment` instance with:
- Current user's ID/name/avatar → `author`
- Current workspace ID → `workspace`
- Target coordinates (pocket_id, widget_id or agent type) → `target`
- User-supplied text → `body`
- Parsed @-mentions → `mentions`
- No `thread` (or optional parent comment ID if replying)
- `resolved = False` initially
2. **Storage**: Beanie ORM persists the document to MongoDB's `comments` collection, generating `_id` and timestamps.
3. **Retrieval patterns**:
- **Comments on a pocket**: `Comment.find(Comment.target.pocket_id == "pocket_123", Comment.workspace == "ws_456").sort(("created_at", -1))` — uses the indexed field
- **Thread replies**: `Comment.find(Comment.thread == "comment_parent_id")` — fetches all replies to a specific comment
- **User mentions**: `Comment.find(Comment.mentions.contains("user_789"))` — for notification systems
4. **Resolution workflow**: When an issue comment is resolved, a service updates the document:
```python
comment.resolved = True
comment.resolved_by = current_user_id
await comment.save() # Triggers updated_at update via TimestampedDocument
```
### Edge Cases
- **Deleted users**: Author snapshot preserves the name/avatar; `mentions` references may point to non-existent users (services must handle gracefully)
- **Deeply nested threads**: No depth limit is enforced; clients should implement UI truncation (e.g., show only 2 levels, "load more" for deeper replies)
- **Empty mentions**: `mentions` defaults to empty list; no validation prevents posting comments with body-text mentions that aren't in the list
- **Widget comments without widget_id**: Model allows `widget_id = None` but `type = "widget"`, creating ambiguous state (validation likely in service layer)
## Authorization and Security
This model layer does **not enforce authorization**; that responsibility belongs to consuming services (API routers or service classes). Typical patterns:
- **Read**: Users can see comments in workspaces they're members of
- **Create**: Users must be workspace members; rate-limiting likely applied in service layer
- **Resolve**: Typically restricted to comment author, pocket owner, or workspace admins
- **Delete**: Often restricted to author or admins; soft-delete pattern may be used (not visible in this model)
The `workspace` field is the **isolation boundary**—queries should always filter by workspace to prevent cross-workspace leakage. This is a responsibility of the consuming service/repository layer.
## Dependencies and Integration
### Incoming dependencies (what imports this module)
- `__init__` (package-level exports) — Makes `Comment`, `CommentTarget`, `CommentAuthor` available to other modules
- Implicit consumers: API routes, services, and tests that need to instantiate or query comments
### Outgoing dependencies (what this module imports)
- **`ee.cloud.models.base.TimestampedDocument`** — Base class providing `created_at` and `updated_at` automatic timestamps. This is a shared base used across PocketPaw documents, ensuring consistent timestamp handling.
- **`beanie.Indexed`** — ODM decorator marking fields for database indexing. Beanie is the async MongoDB ORM layer.
- **`pydantic.BaseModel`, `pydantic.Field`** — Validation and serialization; BaseModel defines `CommentTarget` and `CommentAuthor` as simple nested structures with schema validation.
### Downstream integration patterns
- **CommentService** (likely exists in service layer) — CRUD operations, thread resolution, mention parsing
- **Comment API routes** — FastAPI endpoints for POST (create), GET (list by pocket), PUT (resolve)
- **Notification system** — Subscribes to comment creation events; queries `mentions` to trigger alerts
- **Search/indexing** — May replicate comment data to Elasticsearch for full-text search
## Design Decisions
### 1. **Immutable author snapshot vs. user reference**
- **Choice**: Store author as nested `CommentAuthor` (name, avatar) rather than just `author_id`
- **Rationale**: Comments remain human-readable even after user deletion/rename. Immutability preserves historical accuracy ("Alice said..." not "User#123 said...")
- **Trade-off**: If a user updates their avatar, old comments won't reflect it (acceptable in collaborative tools)
### 2. **Workspace as indexed field**
- **Choice**: Every `Comment` has an explicit `workspace` field, indexed
- **Rationale**: Multi-tenant SaaS requirement; enables efficient per-workspace queries without relying on workspace ID from request context
- **Trade-off**: Denormalizes the pocket → workspace relationship (pocket documents would already contain workspace); justified because comments are frequently queried in isolation
### 3. **Flexible CommentTarget with type enum**
- **Choice**: Single `target` field with `type`, `pocket_id`, optional `widget_id` rather than separate Comment subclasses
- **Rationale**: All comment queries and operations are identical regardless of target type; polymorphism via type field is simpler than document inheritance
- **Trade-off**: No database-level enforcement of "if type=widget, widget_id must be non-null" (validation is application-level responsibility)
### 4. **Simple thread model with parent_id**
- **Choice**: `thread: str | None` points to a parent comment; no separate ThreadGroup model
- **Rationale**: Threads are shallow in practice (1-2 levels); parent-id is simpler to query and index than a separate collection
- **Trade-off**: Deep nesting (replies to replies) requires client-side recursion or multiple queries; not optimized for very deep discussions
### 5. **Mentions as list of user IDs, not parsed objects**
- **Choice**: Store `mentions: list[str]` (raw IDs) rather than full user objects or parsed mention ranges
- **Rationale**: Minimal storage; enables efficient queries ("notify these users") without maintaining mention object state
- **Trade-off**: Clients must parse `body` text independently to render mentions; no shared mention syntax validation at model level
### 6. **Single compound index strategy**
- **Choice**: One index on `(target.pocket_id, created_at)` instead of multiple indexes
- **Rationale**: The dominant query pattern is "comments on pocket X sorted by recency"; one well-chosen index beats many small ones
- **Trade-off**: Queries on `workspace` alone or `mentions` may be slower; acceptable because these are secondary query patterns
## Architectural Context
This module is part of PocketPaw's **collaboration layer**, sitting between:
- **Domain models** (above): API schemas, service DTOs that may reshape comments for API responses
- **Persistence layer** (below): Beanie ORM, MongoDB driver, database indexes
It represents a **clean separation** of concerns:
- Model = what the data looks like (schema, validation, indexed fields)
- Service = how comments behave (threaded logic, mention resolution, permissions)
- API = how clients interact with comments (REST or GraphQL endpoints)
This separation allows the schema to evolve independently of the API contract.
---
## Related
- [base-foundational-document-model-with-automatic-timestamp-management-for-mongodb](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)

View File

@@ -0,0 +1,186 @@
# core — Enterprise JWT authentication with cookie and bearer transport for FastAPI
> This module implements a complete authentication system for PocketPaw using fastapi-users, providing user registration, login, logout, and profile management via both HTTP cookies (for browsers) and bearer tokens (for API/Tauri clients). It exists as a separate module to centralize all authentication concerns—user lifecycle, token strategies, session management—and to be imported by the router layer, which exposes these capabilities as REST endpoints. It forms the foundation of the enterprise auth architecture, sitting above the User data model and below the public API routers.
**Categories:** authentication, authorization, enterprise security, API layer, service layer
**Concepts:** UserManager, UserRead, UserCreate, JWTStrategy, FastAPIUsers, BeanieUserDatabase, CookieTransport, BearerTransport, AuthenticationBackend, fastapi-users library
**Words:** 1404 | **Version:** 1
---
## Purpose
This module solves the problem of **secure user authentication and session management** in a multi-client architecture (web browser + desktop Tauri app + API consumers). Rather than building authentication from scratch, it wraps `fastapi-users`—a battle-tested FastAPI authentication library—and configures it for PocketPaw's specific needs:
1. **Dual transport layer**: Browsers receive JWTs in HTTP-only cookies; API clients and Tauri app send JWTs in the `Authorization: Bearer` header. Both routes validate the same token.
2. **User lifecycle management**: Registration, email verification, password reset, and profile updates are handled by the `UserManager` class.
3. **Admin bootstrap**: The `seed_admin()` function ensures a default administrator account exists on first startup, reading defaults from environment variables.
4. **Enterprise-ready**: Supports superuser designation, verification tokens, and password reset workflows.
Within the system architecture, `core` is the **authentication engine**: it's imported by `router` (which wires endpoints) and depends on `user` (the User model), forming a clean separation between authentication mechanics and HTTP concerns.
## Key Classes and Methods
### `UserManager` — Lifecycle hooks and password handling
Inherits from `ObjectIDIDMixin` and `BaseUserManager[User, PydanticObjectId]`, extending fastapi-users' user manager:
- **`reset_password_token_secret`, `verification_token_secret`**: Shared secrets for generating secure tokens sent in password-reset and email-verification emails. Both use the `SECRET` constant.
- **`async on_after_register(user, request)`**: Hook called after a user signs up. Currently logs the registration event; could be extended to send welcome emails, create default workspace memberships, etc.
- **`async on_after_login(user, request, response)`**: Hook called after login. Logs the event; can be used for audit trails, analytics, or updating last-login timestamps.
### `UserRead` and `UserCreate` — Schemas
- **`UserRead`**: Pydantic model for serializing User responses. Extends `fastapi_users_schemas.BaseUser` and adds `full_name` and `avatar` fields for profile display.
- **`UserCreate`**: Pydantic model for registration payloads. Extends `fastapi_users_schemas.BaseUserCreate` and adds `full_name` for user-provided display names.
### `get_user_db()` — Database adapter (async generator)
Yields a `BeanieUserDatabase(User, OAuthAccount)` instance. This bridges fastapi-users' generic user store interface to the Beanie ODM layer. Each request gets its own instance via FastAPI dependency injection.
### `get_user_manager(user_db)` — Manager factory (async generator)
Creates a `UserManager` instance for each request, passing the user database. FastAPI will inject `user_db` by resolving the `get_user_db()` dependency. This pattern ensures each request has isolated, clean database and manager instances.
### `get_jwt_strategy()` — JWT token factory
Returns a `JWTStrategy` configured with:
- `secret`: The signing key (from `SECRET`)
- `lifetime_seconds`: Token expiration window (7 days)
Both cookie and bearer backends use the same strategy, ensuring tokens are interchangeable between transports.
### `seed_admin()` — Bootstrap admin account
**Purpose**: Ensure at least one superuser exists for initial system setup.
**Parameters** (all optional, fall back to env vars):
- `email`: Defaults to `ADMIN_EMAIL` env var or `admin@pocketpaw.ai`
- `password`: Defaults to `ADMIN_PASSWORD` env var or `admin123`
- `full_name`: Defaults to `ADMIN_NAME` env var or `Admin`
**Behavior**:
1. Checks if a user with `email` already exists; if so, returns it and logs.
2. Creates a new user via `UserManager.create()` with:
- `is_superuser=True`: Grants admin privileges
- `is_verified=True`: Skips email verification (admin doesn't need to verify their own email)
3. Re-saves the user to persist the `full_name` (note: `UserManager.create()` doesn't set custom fields).
4. Returns the created User or the existing one; returns None on unexpected errors.
5. Handles the `UserAlreadyExists` exception and re-queries the database (defensive pattern for race conditions).
## How It Works
### Authentication Flows
**Registration** (via router's `POST /auth/register`):
1. Client sends `{email, password, full_name}`.
2. FastAPI dependency injection calls `get_user_manager()``get_user_db()`.
3. `UserManager.create()` hashes the password, saves the User model to MongoDB, and calls `on_after_register()`.
4. Response includes `UserRead` serialization.
**Login** (via router's `POST /auth/login`):
1. Client sends `{username (email), password}`.
2. `UserManager` validates credentials (password hash comparison).
3. `JWTStrategy` generates a signed JWT token containing the user ID and claims.
4. **Cookie transport** sets `paw_auth` cookie with the token (HTTP-only, Lax SameSite).
5. **Bearer transport** returns token in response body for API clients.
6. `on_after_login()` is called for logging.
**Authorization** (on protected routes):
1. Browser: Cookie automatically sent; fastapi-users extracts `paw_auth` and validates.
2. API: `Authorization: Bearer <token>` header; fastapi-users extracts and validates.
3. Both extract the user ID from the JWT, re-fetch the User from MongoDB, and ensure `active=True`.
4. Request proceeds with the User available via `Depends(current_active_user)`.
### Token Lifetime and Expiration
`TOKEN_LIFETIME = 60 * 60 * 24 * 7` (7 days). Tokens expire after this window; clients must re-login. Refresh tokens are not implemented here (design choice: rely on login being lightweight with email/password or OAuth).
### Edge Cases
- **Token tampering**: JWT validation fails; request denied.
- **User deactivated after login**: Re-fetch on each request detects `active=False`; request denied.
- **Admin seeding race condition**: If two startup processes call `seed_admin()` simultaneously, the second catches `UserAlreadyExists` and re-queries. Beanie should handle database-level uniqueness constraints.
- **Missing SECRET env var**: Defaults to `"change-me-in-production-please"`, which is a loud warning but allows dev/test without setup.
## Authorization and Security
### Cookie Security
- **HTTP-only**: JavaScript cannot access `paw_auth`; mitigates XSS token theft.
- **Secure flag**: Set to `False` in code (comment says to enable in production with HTTPS). In production, this must be `True` to prevent transmission over unencrypted HTTP.
- **SameSite=Lax**: Mitigates CSRF attacks; cookie sent on safe cross-site requests (GET, navigation) but not on form POST or XHR from other origins.
### Bearer Token Security
- No built-in transport security; relies on HTTPS and request origin checks.
- Suitable for Tauri (native app, can't be phished easily) and trusted API consumers.
### JWT Secrets
- Both cookies and bearer tokens use the same `SECRET` for signing.
- If `SECRET` is leaked or rotated, all outstanding tokens become invalid immediately (no grace period).
### User Verification and Password Reset
- `reset_password_token_secret` and `verification_token_secret` are used by fastapi-users to generate secure time-bound tokens sent in emails.
- Not explicitly used in this file but configured; the router layer exposes the endpoints.
## Dependencies and Integration
### Imports from:
- **`ee.cloud.models.user`**: The `User` Beanie model, `OAuthAccount` (for OAuth2 integration, though not used here), and `WorkspaceMembership` (imported but not used in this file). These are the domain objects that represent authenticated users in the database.
- **`fastapi`, `fastapi_users`, `beanie`**: Third-party libraries providing the auth framework and database layer.
### Imported by:
- **`router`** (sibling module): Imports `fastapi_users`, `UserRead`, `UserCreate`, `current_active_user`, `current_optional_user`, and `seed_admin()` to define the actual REST endpoints.
- **`__init__`** (package init): May re-export key symbols for public API.
### How It Connects
This module is the **configuration layer** for authentication. It instantiates fastapi-users' machinery (managers, strategies, backends) without exposing endpoints. The router layer consumes these instances to build REST routes. The User model flows through the entire pipeline: created in `UserCreate`, persisted to MongoDB, retrieved in queries, and serialized in `UserRead`.
## Design Decisions
### Dual Transport Layer
**Why**: Single-page apps and desktop clients have different capabilities. Cookies require same-origin requests and CSRF protection; bearer tokens are RESTful and stateless but require client-side storage.
**Trade-off**: Dual transport adds complexity but allows the same backend to serve multiple client types seamlessly.
### Dependency Injection Pattern
`get_user_db()` and `get_user_manager()` are async generators that yield instances, relying on FastAPI's `Depends()` to manage their lifecycle. This ensures:
- Fresh database connections per request (isolation).
- Easy testing (inject mock managers).
- Clean separation of concerns (database creation vs. business logic).
### Hooks Over Middleware
Hooks like `on_after_register()` and `on_after_login()` are cleaner than post-request middleware for auth-specific side effects. They're called at the right moment in the user lifecycle and have access to the full context (user, request, response).
### Explicit Admin Seeding
`seed_admin()` is a function that must be **explicitly called** (e.g., in an app startup event), not automatic. This gives operators control: they can seed in a separate CLI command, in tests, or not at all in production (relying on OAuth or other flows).
### 7-Day Token Lifetime
**Why**: Long enough to avoid frequent re-logins (good UX for Tauri apps), short enough to limit the window of compromise if a token is stolen. No refresh tokens; users re-authenticate to get a new token (simple, secure, trades off UX slightly).
### Secrets in Environment Variables
Both `SECRET` and admin credentials come from env vars, enabling:
- Different secrets in dev, staging, production.
- Secrets not stored in code (reduced blast radius if repo is leaked).
- CI/CD pipeline integration (secrets injected at deploy time).
The fallback defaults are intentionally weak (`"change-me-in-production-please"`, `"admin123"`) to encourage setup without requiring manual tweaks for local dev, but loud enough to prompt security hardening before production.
---
## Related
- [untitled](untitled.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)

View File

@@ -0,0 +1,90 @@
# db — Backward compatibility facade for cloud database initialization
> This module is a thin re-export layer that delegates all database functionality to the canonical implementation in `ee.cloud.shared.db`. It exists to maintain backward compatibility with code that may import from this location, preventing breakage when the shared database module was introduced or relocated. Its role is strictly as a compatibility bridge in the pocketPaw cloud infrastructure layer.
**Categories:** infrastructure — cloud database, compatibility layer, architectural pattern — facade
**Concepts:** backward compatibility shim, re-export pattern, facade pattern, init_cloud_db, close_cloud_db, get_client, linter suppression (noqa: F401), namespace redirection, cloud infrastructure layer, shared module organization
**Words:** 668 | **Version:** 1
---
## Purpose
This module exists as a **backward compatibility shim** — a common architectural pattern used when refactoring code organization without breaking existing imports. The actual database initialization and client management logic lives in `ee.cloud.shared.db`, but some parts of the codebase (or external integrations) may have been written to import from `ee.cloud.db`. Rather than updating all call sites, this module re-exports the same three functions, allowing both import paths to work.
This pattern is particularly valuable in:
- **Gradual migrations**: Teams can update imports incrementally without a flag-day refactor
- **External integrations**: Third-party code or plugins may have hardcoded import statements
- **Organizational evolution**: As shared infrastructure is recognized, centralizing it in `shared/` becomes cleaner, but old import paths still work
## Key Classes and Methods
This module contains **no classes** — it is purely a re-export facade. Three functions are delegated:
### `init_cloud_db()`
Initializes the cloud database connection. The actual implementation lives in `ee.cloud.shared.db.init_cloud_db()`. Any code importing from this module gets the same function.
### `close_cloud_db()`
Closes the cloud database connection gracefully. Delegated to `ee.cloud.shared.db.close_cloud_db()`.
### `get_client()`
Returns the active database client instance. Delegated to `ee.cloud.shared.db.get_client()`.
All three are imported with `# noqa: F401` comments, which tells linters like flake8 to suppress "unused import" warnings — the functions are not used *within* this module, but they are meant to be imported *from* this module by others.
## How It Works
This is a **zero-logic module**:
1. **Import time**: Python evaluates `from ee.cloud.shared.db import init_cloud_db, close_cloud_db, get_client`
2. **Name binding**: These three names are bound in the current module's namespace
3. **Re-export**: Consumers can now do `from ee.cloud.db import init_cloud_db` and get the same object as if they'd imported from the shared module
There is no runtime behavior, caching, or state management here. It is purely a namespace redirect.
### Why F401 Suppression Matters
Without `# noqa: F401`, a linter would flag these as "imported but unused," potentially triggering CI failures or prompting developers to delete the imports (defeating the purpose). The comment is a contract that says: "These imports exist for re-export; do not remove them."
## Dependencies and Integration
**Depends on:**
- `ee.cloud.shared.db` — the canonical database module containing the actual implementation
**Imported by:**
- Unknown within the scanned codebase, but the module exists to serve any code that does `from ee.cloud.db import ...`
**System role:**
This module sits in the "cloud infrastructure" layer of pocketPaw. The parent package `ee.cloud` represents enterprise edition cloud features. By centralizing database logic in `shared/db.py`, the architecture separates:
- **Canonical implementation** (`ee.cloud.shared.db`) — single source of truth
- **Public interfaces** (this module and potentially others) — multiple import paths for backward compatibility
## Design Decisions
### Facade/Adapter Pattern
This is a textbook example of the **Facade Pattern** — presenting a simplified or alternative interface to a subsystem. Here, the alternative interface is simply a different import path.
### Why Not Delete It?
A tempting but risky refactor would be to remove this module and force all imports to update to `ee.cloud.shared.db`. However:
- It breaks external code without warning
- It creates a larger changeset in version control
- It requires coordination across teams/projects
- The module is trivial (2 lines of code), so the cost of keeping it is negligible
### Naming Convention
The placement in `ee.cloud.db` (not `ee.cloud.db.db` or `ee.cloud.db.py`) suggests this was the original module location before refactoring. The parallel existence of a `shared/` package suggests the team recognized this as shared infrastructure.
## When to Use
**For developers writing new code:**
- Prefer importing directly from `ee.cloud.shared.db` — it's the canonical location
- This module is for legacy code or external dependencies
**For code owners migrating imports:**
- Gradually move from `ee.cloud.db` to `ee.cloud.shared.db`
- Once all imports are updated, this module can be deleted (but there's no urgency)
**For architects understanding the codebase:**
- This is a signal that `ee.cloud.shared.db` is the central database module
- Look there for the actual logic, initialization hooks, and client management
- This module demonstrates thoughtful backward compatibility practices

View File

@@ -0,0 +1,262 @@
# db — MongoDB connection and Beanie ODM lifecycle management for PocketPaw cloud infrastructure
> This module provides a centralized, application-level abstraction for managing MongoDB connections and initializing the Beanie ODM (Object-Document Mapper) in the PocketPaw cloud environment. It exists to decouple database initialization logic from application startup, provide a singleton pattern for the MongoDB client, and ensure consistent document model registration across the cloud system. The module serves as the foundational data persistence layer for all cloud-based features.
**Categories:** data persistence, infrastructure layer, application lifecycle, ODM integration
**Concepts:** AsyncMongoClient, Beanie ODM, document model registration, singleton pattern, module-scoped state, deferred import, async initialization, connection pooling, graceful shutdown, URI parsing
**Words:** 1475 | **Version:** 1
---
## Purpose
The `db` module solves a critical architectural problem: **how to reliably initialize and manage MongoDB connectivity in an async Python application while ensuring all document models are registered with the ODM**.
In distributed systems, database initialization must be:
- **Centralized**: A single source of truth for connection configuration prevents inconsistent state
- **Deferred**: Initialization should happen at application startup, not import time, allowing configuration injection
- **Async-aware**: MongoDB operations in PocketPaw are async-first, requiring non-blocking I/O
- **Model-complete**: All Beanie document models must be registered before queries execute, or ODM introspection fails
This module lives at the intersection of three concerns:
1. **Infrastructure layer**: Manages low-level MongoDB/PyMongo connectivity
2. **ODM integration layer**: Bridges MongoDB and Beanie's document model system
3. **Application lifecycle**: Coordinates setup/teardown with application startup/shutdown events
Without this module, every service that needs database access would either duplicate connection logic or import models at module load time (causing circular dependencies and early-bound configuration).
## Key Classes and Methods
### Module-Level State: `_client`
```python
_client: AsyncMongoClient | None = None
```
A module-scoped singleton variable holding the active MongoDB connection. Initialized to `None` and populated by `init_cloud_db()`. This pattern enables lazy initialization and clean shutdown without requiring a class wrapper.
**Why not a class?** The module is stateless except for one resource (the client). A class would add ceremony without benefit. The module acts as a namespace for database operations.
### `async def init_cloud_db(mongo_uri: str)`
**Purpose**: Perform complete database initialization—connect to MongoDB, extract the database name, and register all Beanie document models.
**Key behaviors**:
1. **Global mutation**: Sets the module-scoped `_client` variable. This is intentional—callers can later retrieve the client via `get_client()` without re-initializing.
2. **Connection creation**:
```python
_client = AsyncMongoClient(mongo_uri)
```
Creates an async MongoDB client. PyMongo's `AsyncMongoClient` defers actual connection until first operation, making this call cheap.
3. **Database name extraction**:
```python
db_name = mongo_uri.rsplit("/", 1)[-1].split("?")[0] or "paw-cloud"
```
Parses the URI to extract the database name. Examples:
- `mongodb://localhost:27017/paw-cloud` → `paw-cloud`
- `mongodb://user:pass@cloud.example.com/tenant-db?authSource=admin` → `tenant-db`
- `mongodb://localhost:27017` → `paw-cloud` (fallback)
This allows environment-specific URIs without hardcoding the database name.
4. **Model registration**:
```python
from ee.cloud.models import ALL_DOCUMENTS
await init_beanie(database=db, document_models=ALL_DOCUMENTS)
```
Imports all document models from `ee.cloud.models.ALL_DOCUMENTS` and registers them with Beanie. This is a **deferred import**—models are loaded only when database is initialized, avoiding circular imports and ensuring configuration is set before models introspect the environment.
5. **Logging**: Records successful initialization with database name and model count, aiding operational visibility.
**Side effects**: This function must be called exactly once at application startup. Calling it twice will replace the previous client and reinitialize Beanie.
### `async def close_cloud_db()`
**Purpose**: Clean shutdown of the MongoDB connection, enabling graceful app termination.
**Key behaviors**:
1. **Idempotent**: Safely checks if `_client` exists before closing; calling twice is safe.
2. **Connection cleanup**: Closes all pooled connections in the client.
3. **State reset**: Sets `_client = None`, allowing detection of uninitialized state in `get_client()`.
**Typical use**: Registered as a shutdown handler in the FastAPI app's `@app.on_event("shutdown")` or via lifespan context manager.
### `def get_client() -> AsyncMongoClient | None`
**Purpose**: Retrieve the initialized MongoDB client for direct access (e.g., in custom queries or transactions).
**Return value**: The `AsyncMongoClient` if `init_cloud_db()` was called, or `None` if not yet initialized or already closed.
**Design note**: Returns `None` instead of raising an exception, allowing callers to handle uninitialized state gracefully. Consumers should check for `None` before use.
## How It Works
### Initialization Sequence (Typical Application Startup)
```
1. FastAPI app startup event fires
2. Application code calls: await init_cloud_db(os.environ["MONGO_URI"])
3. AsyncMongoClient created (connection pool initialized, not yet connected)
4. Database name extracted from URI
5. ALL_DOCUMENTS imported from ee.cloud.models
6. Beanie.init_beanie() called → ODM introspects all document classes,
registers indexes, validates schemas
7. _client module variable populated
8. Logger confirms initialization
9. Application handlers (services, routers) can now use get_client()
```
### Data Flow: Query Execution
```
Service code calls Beanie query:
user = await User.find_one({...})
Beanie looks up User in its registry (populated by init_cloud_db)
Beanie uses the database connection (passed to init_beanie)
Query sent to MongoDB via PyMongo async driver
Document returned and deserialized to User instance
```
### Shutdown Sequence
```
1. FastAPI app shutdown event fires
2. Application code calls: await close_cloud_db()
3. _client.close() terminates all connections
4. _client set to None
5. Any subsequent get_client() calls return None
```
### Edge Cases
**No initialization**: If code calls `get_client()` before `init_cloud_db()`, it receives `None`. Services using this should either:
- Assume initialization happened (trust application startup)
- Explicitly check and raise an error
**URI parsing edge case**: The URI parser is defensive—malformed URIs fall back to `"paw-cloud"` database name. Example:
- `mongodb://localhost` (no database) → uses `paw-cloud`
- `mongodb://localhost/` (trailing slash) → uses `paw-cloud`
**Multiple initializations**: Calling `init_cloud_db()` twice leaks the first client (old one not closed). This is a bug if it occurs—callers must ensure single initialization.
## Authorization and Security
**No built-in access control**: This module does not enforce authorization. It assumes:
- The calling code is trusted application startup code, not untrusted user input
- The `mongo_uri` is controlled by the application operator (environment variable or config)
- The URI includes authentication credentials if MongoDB requires it
**Security considerations**:
- **Credential handling**: URIs may contain passwords (e.g., `mongodb://user:pass@host`). Ensure URIs are not logged or exposed; the module logs only the database name, not the full URI.
- **URI validation**: The URI is passed directly to `AsyncMongoClient()`, which validates it. Invalid URIs raise exceptions at connection time.
- **Network security**: This module does not configure TLS/SSL; those settings are specified in the URI (e.g., `mongodb+srv://` for MongoDB Atlas).
## Dependencies and Integration
### Dependencies (Incoming)
**External libraries**:
- **`pymongo.AsyncMongoClient`**: Low-level async MongoDB driver. Manages connection pooling, protocol, and raw queries.
- **`beanie.init_beanie`**: ODM initialization. Registers document models, sets up indexing, connects Beanie to the database.
- **Python `logging`**: Standard library; logs initialization messages for operational visibility.
**Internal dependencies**:
- **`ee.cloud.models.ALL_DOCUMENTS`**: A collection of all Beanie document models used in the cloud system. This is a **deferred import**—loaded only at `init_cloud_db()` call time to avoid circular imports.
### Dependents (Who Uses This)
**Inbound calls** (not visible in the import graph, but expected):
- **Application startup code** (likely in `ee/cloud/app.py` or `ee/cloud/main.py`): Calls `init_cloud_db()` and `close_cloud_db()` via FastAPI lifecycle events.
- **Service layer** (e.g., `ee/cloud/services/*.py`): Calls `get_client()` for direct database access when Beanie ORM queries are insufficient (e.g., bulk operations, transactions, aggregation pipelines).
- **Testing/fixtures**: Initializes and tears down the database for test isolation.
### Why Separate from Models
The module imports `ee.cloud.models.ALL_DOCUMENTS` at runtime, not at module load time. This separation prevents circular imports:
- Models may reference services
- Services use this `db` module
- If models imported this module at load time, a cycle would form
The deferred import breaks the cycle: models are loaded only when the app explicitly initializes the database.
## Design Decisions
### Singleton Pattern via Module Variables
**Decision**: Store the client in a module-scoped `_client` variable instead of a class.
**Rationale**:
- Minimizes boilerplate for a single-resource pattern
- Aligns with Python conventions (e.g., `logging.getLogger()` is a module function, not a class method)
- Clean API: `init_cloud_db()`, `get_client()`, `close_cloud_db()` are top-level functions
**Trade-off**: Less testable (global state). Mitigated by ensuring tests call `init_cloud_db()` and `close_cloud_db()` explicitly in setup/teardown.
### Async Initialization
**Decision**: `init_cloud_db()` and `close_cloud_db()` are async functions.
**Rationale**:
- `init_beanie()` is async (it may perform I/O to introspect the database)
- Aligns with async application startup (FastAPI lifespan events are async)
- Future-proofs: if initialization adds async operations (e.g., schema validation), it's already an async context
**Implication**: Callers must use `await` in async contexts:
```python
@app.on_event("startup")
async def startup():
await init_cloud_db()
```
### Defensive URI Parsing
**Decision**: Extract database name from URI with a fallback instead of raising an error.
**Rationale**:
- Malformed URIs are typically caught by `AsyncMongoClient()` with clear errors
- Fallback database name (`paw-cloud`) provides a sensible default
- Reduces boilerplate for callers (they don't need to validate the URI format)
**Edge case**: If the URI is intentionally minimal (e.g., `mongodb://localhost`), the module assumes `paw-cloud` as the database, which may not match the actual database name. Operators should use explicit URIs.
### No Client Caching Layer
**Decision**: `get_client()` returns the raw `AsyncMongoClient`, not a wrapper or cache.
**Rationale**:
- `AsyncMongoClient` already manages connection pooling internally
- Callers with specialized needs (e.g., transactions) can access the raw client
- Simpler code path: no indirection
**Trade-off**: Callers are responsible for proper async/await usage; no automatic connection validation.
### Single Database Instance
**Decision**: All document models share one database (extracted from the URI).
**Rationale**:
- Simplifies initialization and shutdown
- Typical for monolithic apps with a single primary database
- Multi-database scenarios would require separate initialization functions
**Future extensibility**: If needed, a sibling function `init_cloud_db_secondary()` could initialize additional databases.

View File

@@ -0,0 +1,254 @@
# deps — FastAPI dependency injection layer for cloud router authentication and authorization
> This module provides FastAPI dependency functions that extract and validate user authentication and workspace context from JWT tokens. It exists to centralize credential handling and role-based access control across cloud routers, eliminating repeated auth logic and ensuring consistent security checks. It serves as the bridge between FastAPI's dependency injection system and the application's authentication/authorization model.
**Categories:** Authentication & Authorization, API Router Layer, FastAPI Middleware & Dependency Injection, Multi-Tenant Access Control
**Concepts:** FastAPI dependency injection, JWT authentication, Role-based access control (RBAC), Workspace isolation, current_active_user, current_user, current_user_id, current_workspace_id, optional_workspace_id, require_role
**Words:** 1715 | **Version:** 1
---
## Purpose
The `deps` module solves a critical architectural problem: **how to reliably inject authenticated user context and workspace scope into every cloud router endpoint without duplicating code**.
In a multi-tenant cloud application, nearly every API endpoint needs to:
1. Verify the request comes from an authenticated user (via JWT token)
2. Extract the user's active workspace context
3. Optionally validate the user has a minimum role in that workspace
Instead of repeating these checks in every endpoint handler, FastAPI provides dependency injection. This module wraps authentication logic into reusable dependency functions that FastAPI automatically invokes and injects.
**System Position**: This module sits at the intersection of three concerns:
- **Authentication layer** (`ee.cloud.auth.current_active_user`): Provides the raw authenticated User object from the JWT token
- **Authorization layer** (`ee.cloud.shared.permissions.check_workspace_role`): Validates role requirements
- **Router layer** (consuming modules like `__init__`, `router`): Uses these dependencies as function parameters in endpoint handlers
## Key Classes and Methods
This module contains only functions, no classes. Each function is a FastAPI dependency that can be injected into endpoint handlers.
### `current_user(user: User) → User`
**What it does**: Returns the authenticated user object.
**Why it exists**: Provides a named, documented dependency that makes endpoints' authentication requirements explicit. Endpoints that need just the user object (not workspace-scoped operations) depend on this.
**How it works**: It declares a dependency on `current_active_user` (from the auth module), which handles the actual JWT validation. This function simply passes it through, creating a semantic checkpoint.
### `current_user_id(user: User) → str`
**What it does**: Extracts and returns the authenticated user's ID as a string.
**Why it exists**: Some endpoints need only the user ID, not the full user object. This provides that without forcing callers to extract it themselves. Also normalizes the ID to string type.
**How it works**: Depends on `current_active_user`, then converts `user.id` to a string. The conversion is important because IDs might be integers or other types in the User model, but APIs prefer string representations.
### `current_workspace_id(user: User) → str`
**What it does**: Returns the user's currently active workspace ID, or raises an HTTP 400 error if none is set.
**Critical behavior**: This dependency has a **hard requirement**—it enforces that the user must have an active workspace. This is the primary validation point for workspace-scoped operations.
**How it works**:
1. Depends on `current_active_user` to get the user
2. Checks `user.active_workspace` is not None/empty
3. If missing, raises `HTTPException(400)` with a user-friendly message: "No active workspace. Create or join a workspace first."
4. If present, returns the workspace ID
**Edge case**: The error message guides users toward resolving the issue (create or join a workspace), suggesting this is a common problem in the UX.
### `optional_workspace_id(user: User) → str | None`
**What it does**: Returns the user's active workspace ID if set, or None if not.
**Key difference from `current_workspace_id`**: This is **permissive**—it allows endpoints to work even if the user has no active workspace.
**Use case**: Endpoints that don't inherently require a workspace context (e.g., "list all my workspaces," "create a new workspace") should use this. Workspace-scoped operations like "read workspace files" should use the stricter `current_workspace_id`.
**How it works**: Simply returns `user.active_workspace` directly, which FastAPI converts to None if absent.
### `require_role(minimum: str) → async callable`
**What it does**: A dependency factory that returns a new dependency function enforcing minimum workspace role requirements.
**Why it exists**: Implements **role-based access control (RBAC)** at the dependency layer. It lets endpoints declare "only admins can do this" or "editors and above are allowed" without embedding role logic in handler code.
**How it works** (closure pattern):
1. `require_role("admin")` is called, returning an inner `_check` function
2. The inner function depends on both `current_active_user` (to get the user) and `current_workspace_id` (to know which workspace to check permissions for)
3. Inside `_check`:
- It finds the user's workspace membership record by matching `w.workspace == workspace_id`
- If no membership is found, raises `Forbidden` (403) with code "workspace.not_member"
- If found, calls `check_workspace_role(membership.role, minimum=minimum)` to validate the user's role meets the minimum
- The role check will raise `Forbidden` if the role is insufficient
- If all checks pass, returns the user
**Membership lookup**: The line `next((w for w in user.workspaces if w.workspace == workspace_id), None)` iterates through the user's workspace memberships until it finds one matching the current workspace.
**Example usage in a router**:
```python
@router.delete("/workspaces/{workspace_id}/files/{file_id}")
async def delete_file(
file_id: str,
user: User = Depends(require_role("admin"))
):
# At this point, user is guaranteed to be an admin in the current workspace
# FastAPI has already executed the role check dependency
pass
```
## How It Works
### Data Flow
**Request arrives at an endpoint**:
1. The endpoint declares a dependency, e.g., `user: User = Depends(current_user)`
2. FastAPI's dependency injection system sees this and calls `current_user()`
3. `current_user()` declares its own dependency: `user: User = Depends(current_active_user)`
4. FastAPI calls `current_active_user()` (from the auth module), which validates the JWT token and returns a User object or raises an exception
5. The User object is passed to `current_user()`, which returns it
6. The endpoint handler receives the User object and executes
**For workspace-scoped operations**:
1. Endpoint depends on `current_workspace_id`
2. `current_workspace_id` depends on `current_active_user`
3. FastAPI caches the User object (doesn't call `current_active_user` twice)
4. `current_workspace_id` extracts and validates the workspace ID
5. Endpoint receives the workspace ID
**For role-based operations**:
1. Endpoint depends on `require_role("admin")`
2. This returns the inner `_check` function
3. FastAPI injects `current_active_user` and `current_workspace_id` into `_check`
4. `_check` validates the user is a member and has the required role
5. Endpoint receives the authenticated, authorized user
### Dependency Caching
FastAPI caches dependency results within a single request. If both `current_user_id` and `current_workspace_id` are used in the same endpoint, `current_active_user` is called only once, and the User object is reused. This is efficient.
### Error Handling
- **No authentication**: `current_active_user` (from auth module) raises an exception if the JWT is invalid or missing
- **No active workspace** (when required): `current_workspace_id` raises `HTTPException(400)`
- **Not a workspace member**: `require_role` raises `Forbidden` with code "workspace.not_member"
- **Insufficient role**: `check_workspace_role` raises `Forbidden` with a message indicating what role is required
## Authorization and Security
### Authentication
This module assumes authentication has already been done by `current_active_user` (imported from `ee.cloud.auth`). That function validates JWT tokens. This module **does not** handle token validation—it only consumes authenticated users.
### Authorization (Access Control)
This module implements two layers of authorization:
**1. Workspace membership check** (`require_role`):
- Only users who are members of a workspace can perform workspace-scoped actions
- A user may be a member of multiple workspaces; we check membership in the *active* workspace
**2. Role-based access control** (`check_workspace_role`):
- Within a workspace, users have roles (e.g., "admin", "editor", "viewer")
- Endpoints declare a minimum role requirement
- Only users with a role at or above that level can proceed
### Workspace Isolation
These dependencies enforce **strict workspace isolation**:
- `current_workspace_id` always returns the user's *active* workspace
- Endpoints cannot opt into a different workspace
- If a user switches their active workspace (in the User model), all subsequent requests operate in that workspace
This prevents accidental cross-workspace data access.
## Dependencies and Integration
### What This Module Depends On
1. **`ee.cloud.auth.current_active_user`**
- **What**: The actual authentication function that validates JWT tokens
- **Why**: This module only handles post-authentication concerns (context extraction, role checks). The heavy lifting of token validation is delegated to the auth module.
2. **`ee.cloud.models.user.User`**
- **What**: The User data model
- **Why**: All dependencies work with User objects. The User model contains `active_workspace` and `workspaces` attributes.
3. **`ee.cloud.shared.errors.Forbidden`**
- **What**: A custom exception class for authorization failures
- **Why**: Provides a consistent, application-specific way to signal 403 Forbidden errors instead of generic FastAPI HTTPException.
4. **`ee.cloud.shared.permissions.check_workspace_role`**
- **What**: A role validation function
- **Why**: Centralizes the logic for comparing a user's role against a minimum requirement. This module calls it but doesn't implement role comparison itself.
### What Depends On This Module
1. **`__init__` (the package init)**
- Likely re-exports these dependencies so other modules can import them as `from ee.cloud.shared import current_user, require_role`, etc.
2. **`router` (cloud routers)**
- Cloud API endpoints use these dependencies in their handler signatures
- Example: `async def create_file(file: FileCreate, user: User = Depends(current_user), workspace_id: str = Depends(current_workspace_id))`
### System Architecture Position
```
Request
[FastAPI Router]
[Endpoint Handler]
[deps.py - Dependency Injection]
├→ current_user
├→ current_workspace_id (validation)
└→ require_role (RBAC)
[Auth Module - JWT Validation]
[Permissions Module - Role Checking]
[Handler Executes with Validated Context]
```
## Design Decisions
### 1. **Dependency Injection Over Middleware**
Why not validate in middleware? Because:
- Dependencies are **endpoint-specific**. Different endpoints need different validation (some require workspace, others don't). Middleware would validate the same way for all routes.
- Dependencies are **composable**. `require_role` accepts a parameter, allowing fine-grained control per endpoint.
- Dependencies integrate with **FastAPI's automatic documentation** (OpenAPI). They show up in generated API docs.
### 2. **Separate Functions for Different Extraction Needs**
Why have `current_user`, `current_user_id`, `current_workspace_id`, and `optional_workspace_id` instead of a single function?
- **Precision**: Endpoints declare exactly what they need. If an endpoint only needs the workspace ID, it doesn't pay the cost of loading the full user object (though in practice this is often cached).
- **Clarity**: Code is self-documenting. `Depends(current_workspace_id)` clearly indicates the endpoint requires an active workspace.
- **Validation**: `current_workspace_id` enforces the workspace requirement; `optional_workspace_id` doesn't. This prevents bugs where an endpoint accidentally allows requests without a workspace.
### 3. **Closure Pattern for `require_role`**
Why return a function instead of being a direct dependency?
- **Parameterization**: The role requirement varies per endpoint ("admin" vs. "editor" vs. "viewer"). A closure captures the `minimum` parameter.
- **Clean API**: Endpoints write `Depends(require_role("admin"))`, which reads naturally.
### 4. **Explicit Error Messages**
- `current_workspace_id` raises `HTTPException(400, "No active workspace. Create or join a workspace first.")` instead of a generic 400, guiding users toward a fix.
- `require_role` raises `Forbidden("workspace.not_member", ...)` with a machine-readable code, allowing frontends to handle specific error types.
These choices reflect the principle that **security errors should guide users to compliance**, not just reject requests.
### 5. **No Business Logic**
This module intentionally contains **only routing and validation logic**, not business logic:
- It doesn't modify users or workspaces
- It doesn't query databases
- It delegates role comparison to the permissions module
This keeps dependencies lightweight and testable.
---
## Related
- [untitled](untitled.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)

View File

@@ -0,0 +1,94 @@
# ee.cloud.agents — Package initialization and router export for enterprise cloud agent functionality
> This is a minimal package initialization module that serves as the public API entry point for the enterprise cloud agents subsystem. It re-exports the FastAPI router from the router submodule, making agent routing functionality available to parent packages. This pattern centralizes router registration and ensures clean separation between internal router implementation and external consumption.
**Categories:** API router / integration layer, enterprise cloud agents, package initialization, FastAPI application architecture
**Concepts:** FastAPI router, package initialization, facade pattern, dependency injection (deps), workspace scoping, license entitlements, event-driven architecture, router registration, re-export pattern, enterprise agent subsystem
**Words:** 628 | **Version:** 1
---
## Purpose
This module exists as a package initialization point (`__init__.py`) for the `ee.cloud.agents` namespace. Its sole responsibility is to expose the `router` object from the `router` submodule to any code that imports from `ee.cloud.agents`.
In a FastAPI application architecture, routers are modular endpoint collections that must be registered with the main application. By re-exporting `router` at the package level, this module provides a clean, discoverable import path for parent packages (likely the main FastAPI application factory) to find and include the agents subsystem's endpoints.
## Key Classes and Methods
No classes or functions are defined in this module. The only public export is:
**`router`** (imported from `ee.cloud.agents.router`): A FastAPI `APIRouter` instance that contains all HTTP endpoint definitions for the agents subsystem. This router likely includes endpoints for agent operations across multiple sub-domains (workspace, user, license, etc., as evidenced by the import graph).
## How It Works
When the parent package (or main application) needs to register agent-related endpoints:
1. It imports from `ee.cloud.agents`: `from ee.cloud.agents import router`
2. The import triggers this `__init__.py` file
3. This module imports `router` from its `router` submodule and makes it available in the package namespace
4. The parent application can then register this router with the FastAPI app instance (typically via `app.include_router(router)`)
This is a **facade pattern** applied to package structure: the real router definition and implementation details are hidden in `router.py`, while consumers interact only with this clean entry point.
## Authorization and Security
Authorization is not handled at this initialization level. The `router` object itself will contain endpoint-level authorization checks, likely implemented through:
- FastAPI dependency injection (the `deps` import suggests custom dependencies)
- Middleware or route guards checking user permissions, workspace access, or license entitlements
- Entity-level access control in the service layer
## Dependencies and Integration
**Direct Dependencies:**
- `ee.cloud.agents.router`: Provides the FastAPI router instance to be re-exported
**Indirect Dependencies (inferred from import graph):**
The router module itself depends on multiple submodules:
- `errors`: Custom exception definitions for error responses
- `workspace`, `user`, `license`: Domain models and services for scoped agent operations
- `agent_bridge`: Bridge logic for agent communication or delegation
- `core`: Core agent abstractions
- `agent`, `comment`, `file`, `group`, `invite`, `message`, `notification`: Agent-related entity models and services
- `pocket`, `session`: Session and pocket-specific functionality
- `event_handlers`: Event-driven architecture support
**How It Fits in the System:**
This module is a leaf in the import dependency tree within the scanned set—nothing imports from it within the measured scope. However, it serves as an entry point for the parent application (likely `ee.cloud` or the main FastAPI application factory) to discover and register agent endpoints.
## Design Decisions
1. **Re-export Pattern**: Rather than defining the router here, it's imported from a dedicated `router` module. This separates concerns: router registration from endpoint definition.
2. **`noqa: F401` Comment**: The `# noqa: F401` suppresses unused import warnings. Python linters would otherwise flag `router` as imported but not used within this file. This comment signals that the import's purpose is re-exporting, not local usage.
3. **Package-Level Visibility**: By exporting `router` at the package level, any sibling or parent package can access it via `ee.cloud.agents.router` without needing to know internal structure. This creates a stable, version-friendly API for integration.
4. **Minimal Initialization**: The module performs no initialization logic, caching, or side effects—it purely re-exports. This keeps the import fast and predictable.
## When to Use This Module
- **Application Factory**: Import `router` here when bootstrapping the FastAPI application and registering all routers
- **Integration Tests**: Reference this module to discover agent endpoints without inspecting internal router structures
- **Documentation Generation**: Tools that auto-generate API docs can import `router` from this stable entry point
Do not modify this file unless adding new re-exports from newly created agent submodules, or unless the package-level API contract changes.
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,260 @@
# ee.cloud.__init__ — Cloud domain orchestration and FastAPI application bootstrap
> This module is the entry point for PocketPaw's enterprise cloud layer. It bootstraps a FastAPI application by mounting all domain routers (auth, workspace, agents, chat, pockets, sessions, knowledge base), registering a global error handler, configuring WebSocket endpoints, and initializing cross-domain event handlers and agent lifecycle management. It exists to centralize cloud infrastructure setup and enforce domain-driven architecture patterns across the system.
**Categories:** Cloud Domain — Orchestration, API Router — Bootstrap & Mounting, Infrastructure Layer — Lifecycle Management, Error Handling & Global Middleware, Event-Driven Architecture
**Concepts:** mount_cloud(app), FastAPI application bootstrap, domain-driven architecture, router mounting, exception_handler decorator, CloudError, Depends() dependency injection, current_user, current_workspace_id, async/await patterns
**Words:** 1588 | **Version:** 1
---
## Purpose
This module serves as the **orchestration and bootstrap layer** for PocketPaw's cloud domain architecture. Rather than requiring scattered application initialization code throughout the codebase, `mount_cloud(app)` is a single entry point that:
1. **Registers all domain routers** — Each domain (auth, workspace, agents, chat, pockets, sessions, knowledge base) has a thin `router.py` that declares HTTP endpoints. This function imports and mounts them all with a consistent `/api/v1` prefix.
2. **Installs a global error handler** — Catches `CloudError` exceptions from any domain and converts them to standardized JSON responses with appropriate HTTP status codes.
3. **Provides shared endpoints** — Some endpoints (user search, license info) don't belong to a single domain but serve cross-cutting concerns. They are defined here rather than duplicated.
4. **Configures infrastructure** — Registers event handlers for domain interactions, starts/stops the agent pool, and sets up WebSocket connections.
The module exists because **domain-driven design** requires separation of concerns: each domain (auth, chat, workspace) should be modular and self-contained, but the application still needs a single place to wire everything together. Without this module, the main application file would be cluttered with dozens of `include_router()` calls and scattered initialization logic.
## Key Classes and Methods
### Function: `mount_cloud(app: FastAPI) -> None`
**Purpose:** The primary entry point. Accepts a FastAPI application instance and mutates it by mounting all cloud infrastructure.
**How it works (in sequence):**
1. **Error Handler Registration** — Defines an async exception handler that catches any `CloudError` raised during request processing and returns a JSON response with the error's status code and serialized error data (via `exc.to_dict()`).
2. **Domain Router Mounting** — Imports routers from six domains:
- `ee.cloud.auth.router` → handles authentication (login, signup, token refresh)
- `ee.cloud.workspace.router` → workspace CRUD and settings
- `ee.cloud.agents.router` → agent discovery and execution
- `ee.cloud.chat.router` → message history and chat operations
- `ee.cloud.pockets.router` → pocket (collection) management
- `ee.cloud.sessions.router` → session tracking
- `ee.cloud.kb.router` → knowledge base (documents, embeddings, search)
Each router is mounted at `/api/v1`, so routes become `/api/v1/auth/login`, `/api/v1/workspace/...`, etc.
3. **User Search Endpoint** — Defines an inline `GET /api/v1/users` endpoint that:
- Requires authentication via `current_user` dependency
- Requires workspace context via `current_workspace_id` dependency
- Takes optional `search` and `limit` query parameters
- Queries the `UserModel` collection for users in the current workspace matching the search string (case-insensitive regex on email or full_name)
- Returns a minimal user projection with `_id`, `email`, `name`, `avatar`, `status`
**Why here?** This endpoint is used by group settings and pocket sharing features across multiple domains, so it's shared rather than duplicated.
4. **WebSocket Endpoint** — Registers the WebSocket handler from `ee.cloud.chat.router.websocket_endpoint` at `/ws/cloud` (no `/api/v1` prefix). This allows frontend clients to connect at `ws://host/ws/cloud?token=...` for real-time chat.
5. **License Endpoint** — Defines `GET /api/v1/license` (no authentication required) that returns license information via `get_license_info()`. Accessible to unauthenticated clients so they can check deployment license status.
6. **Event Handler and Agent Bridge Registration** — Calls:
- `register_event_handlers()` — Sets up cross-domain event listeners (e.g., when a message is created, notify agents; when a pocket is shared, update permissions)
- `register_agent_bridge()` — Initializes the agent execution bridge that allows chat endpoints to trigger agent workflows
7. **Agent Pool Lifecycle** — Registers FastAPI startup/shutdown handlers:
- `@app.on_event("startup")` — Calls `get_agent_pool().start()` to initialize the agent pool when the app starts
- `@app.on_event("shutdown")` — Calls `get_agent_pool().stop()` to gracefully shut down agents when the app stops
## How It Works
**Application Bootstrap Flow:**
```
Main application (e.g., main.py)
app = FastAPI()
mount_cloud(app) ← This function
├─ Install CloudError handler
├─ Import and mount 7 domain routers at /api/v1
├─ Add /api/v1/users search endpoint
├─ Add /ws/cloud WebSocket endpoint
├─ Add /api/v1/license endpoint
├─ Register event handlers and agent bridge
└─ Register startup/shutdown hooks for agent pool
uvicorn.run(app)
```
**Request Handling with Error Recovery:**
When a client makes a request to any domain endpoint (e.g., `POST /api/v1/chat/messages`:
1. FastAPI routes it to the appropriate domain router
2. The router calls domain service logic (e.g., `ChatService.create_message()`)
3. If a `CloudError` is raised (e.g., `UnauthorizedError`, `NotFoundError`), FastAPI catches it via the exception handler registered in this module
4. The handler converts it to JSON with the appropriate status code
5. Client receives consistent error response
**WebSocket Connection Lifecycle:**
When a client connects to `ws://host/ws/cloud?token=...`:
1. FastAPI routes to `websocket_endpoint` from `ee.cloud.chat.router`
2. The endpoint validates the token (via dependency injection)
3. Connection is established for real-time chat
4. On shutdown, `_stop_agent_pool()` is called, which may gracefully disconnect all WebSocket clients
**User Search Flow:**
```
GET /api/v1/users?search=john&limit=10
current_user dependency → validates token, returns User object
current_workspace_id dependency → extracts workspace from token/context
Query UserModel with {workspaces.workspace: workspace_id, email/name matches search}
Return [{ _id, email, name, avatar, status }, ...]
```
## Authorization and Security
**Who can call what?**
| Endpoint | Authentication | Authorization | Notes |
|----------|---|---|---|
| `/api/v1/*` (domain routers) | Per-domain (auth router skips login route) | Per-domain (e.g., workspace membership, pocket ownership) | Each domain router applies its own checks |
| `/api/v1/users` | Required (`current_user`) | Required (`current_workspace_id`) | Can only search users in own workspace; useful for sharing/collaboration |
| `/ws/cloud` | Required (token in query param) | Required (workspace context) | Real-time chat; validates token before upgrading connection |
| `/api/v1/license` | **Not required** | None | Public endpoint; needed for license checks before login |
**Error Handling:**
The `CloudError` exception handler ensures that all domain errors are converted to standardized HTTP responses. The `CloudError` class (from `ee.cloud.shared.errors`) likely includes:
- `status_code` — HTTP status (401, 403, 404, 500, etc.)
- `to_dict()` method — Serializes error to JSON (message, error code, details)
This prevents information leakage and ensures consistent error contracts.
## Dependencies and Integration
**What this module imports (inbound dependencies):**
- **FastAPI** — Web framework for routing and dependency injection
- **ee.cloud.shared.errors.CloudError** — Base exception class for all cloud domain errors
- **ee.cloud.shared.deps** — `current_user`, `current_workspace_id` dependency functions
- **ee.cloud.shared.event_handlers.register_event_handlers** — Cross-domain event subscription setup
- **ee.cloud.shared.agent_bridge.register_agent_bridge** — Agent execution bridge
- **ee.cloud.auth.router** — Authentication domain (login, signup, token)
- **ee.cloud.workspace.router** — Workspace domain (CRUD, settings)
- **ee.cloud.agents.router** — Agent discovery and execution
- **ee.cloud.chat.router** — Chat domain (messages, WebSocket)
- **ee.cloud.pockets.router** — Pocket domain (collections, sharing)
- **ee.cloud.sessions.router** — Session domain (tracking, cleanup)
- **ee.cloud.kb.router** — Knowledge base domain (documents, search)
- **ee.cloud.license.get_license_info** — License information endpoint
- **ee.cloud.models.user.User** — User model for search endpoint
- **pocketpaw.agents.pool.get_agent_pool** — Agent pool lifecycle management
**What depends on this module:**
No other modules in the scanned set import from `ee.cloud.__init__`, but the main application (entry point) **must** call `mount_cloud(app)` after creating the FastAPI instance:
```python
# In main.py or similar
from fastapi import FastAPI
from ee.cloud import mount_cloud
app = FastAPI()
mount_cloud(app) # ← Required to set up all cloud infrastructure
```
**Integration with other systems:**
- **Event System** — The `register_event_handlers()` call subscribes to events from each domain (message created, pocket shared, etc.) and triggers cross-domain actions
- **Agent System** — The `register_agent_bridge()` call allows chat endpoints to trigger agent execution; the startup/shutdown hooks ensure the agent pool is available during app lifetime
- **Authentication** — All endpoints rely on `current_user` and `current_workspace_id` dependencies, which are likely defined in `ee.cloud.shared.deps` and validate JWT tokens or similar
- **Database** — User search uses Beanie ODM (`UserModel.find()`, `.to_list()`) to query MongoDB
## Design Decisions
**1. Centralized Router Mounting (Facade Pattern)**
Instead of requiring the main application to import and mount 7+ routers independently, `mount_cloud()` acts as a facade. Benefits:
- Single point of change when adding/removing domains
- Main application stays clean and focused on infrastructure concerns
- Easier onboarding (developer only calls one function)
**2. Domain-Driven Architecture**
Each domain (auth, chat, workspace) is a separate module with:
- `router.py` — HTTP contract (thin, validation + routing)
- `service.py` — Business logic (stateless, testable)
- `schemas.py` — Pydantic models for validation
This module orchestrates these domains without enforcing strong coupling.
**3. Global Error Handler**
Rather than each route catching and converting `CloudError`, a single exception handler does it. Benefits:
- DRY principle — no repeated error handling code
- Consistent error responses across all domains
- Easy to add logging/monitoring to one place
**4. Inline Shared Endpoints**
The user search and license endpoints are defined inline here rather than in a separate "shared" domain. Rationale:
- They're small and cross-cutting
- Don't justify a full domain module
- Belong to infrastructure setup, not business logic
**5. WebSocket at Root Path**
The WebSocket is mounted at `/ws/cloud` (not `/api/v1/ws/cloud`) because:
- WebSocket clients often prefer a different path for routing/caching
- Avoids the `/api/v1` prefix convention (which is for REST APIs)
- Frontend knows to connect to `ws://host/ws/cloud`, not `wss://host/api/v1/...`
**6. Deferred Imports**
Domain routers are imported inside `mount_cloud()` rather than at module level. Benefits:
- Faster startup (don't load all domains if mounting is skipped)
- Circular import prevention (each domain can safely import shared utilities)
- Flexibility (could conditionally mount domains based on configuration)
**7. Agent Pool Lifecycle Management**
Using FastAPI's `on_event` hooks ensures the agent pool is:
- Started after all routers are mounted (so agents can access services)
- Stopped before the app exits (graceful shutdown)
- Integrated with the app's lifecycle (no separate background services to manage)
This is simpler than managing a separate thread or process.
**8. Public License Endpoint**
The `/api/v1/license` endpoint requires no authentication because:
- Clients need to know the license before authentication (deployment licensing)
- License info is non-sensitive (no user data, no private tokens)
- Simplifies client-side flow (no need to handle license checks in auth failures)
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,169 @@
# ee/cloud/kb/__init__ — Knowledge Base Domain Package Initialization and Endpoint Exposure
> This module serves as the entry point for the Knowledge Base (KB) domain within the Enterprise Edition cloud infrastructure. It acts as a package initializer that exposes workspace-scoped KB endpoints for search, ingest, browse, lint, and stats operations. Its existence as a separate __init__ module indicates KB is a distinct bounded domain within the workspace feature set, following domain-driven design principles.
**Categories:** Knowledge Management Domain, API Router / Endpoint Layer, Workspace-Scoped Feature, Enterprise Edition Cloud Infrastructure
**Concepts:** Knowledge Base domain (KB), Workspace scoping, Bounded domain pattern, Domain-driven design, FastAPI router pattern, Dependency injection, Event-driven consistency, Multi-layered authorization, License tier gating, Stateless handler pattern
**Words:** 1132 | **Version:** 1
---
## Purpose
The `__init__.py` module at `/ee/cloud/kb/` functions as the **package initialization boundary** for the Knowledge Base domain. In a domain-driven architecture, this module's primary responsibilities are:
1. **Domain Encapsulation**: It marks the `kb` directory as a Python package and defines the public API surface for all KB-related functionality within the enterprise cloud workspace context.
2. **Problem Space**: The Knowledge Base domain solves the problem of managing, searching, ingesting, and maintaining quality of workspace-specific knowledge repositories. Organizations need to search across documents, lint them for quality, browse hierarchies, and collect statistics—all within strict workspace boundaries.
3. **Architectural Role**: This module sits within the `/ee/cloud/` layer, indicating KB functionality is an enterprise edition feature available to cloud-deployed workspaces. It represents one functional domain among several (workspace, user, agent, message, etc.) that together compose the cloud platform.
## Module Organization
While this specific file contains no executable code (only a comment), its imports reveal the KB domain's internal structure and dependencies:
**Internal Domain Components** (imported from within kb/submodules):
- `errors` — Domain-specific exception types for KB operations
- `router` — FastAPI route handlers exposing KB endpoints (search, ingest, browse, lint, stats)
- `workspace` — Workspace-scoped KB context and bindings
- `core` — Core KB business logic and entity definitions
**Cross-Domain Dependencies** (shared across cloud platform):
- `license` — Access control based on subscription tier
- `user` — User identity and permission context
- `deps` — FastAPI dependency injection utilities
- `event_handlers` — Event publishing for KB mutations (ingest, delete, etc.)
- `agent_bridge` — Integration with agent execution context
- `agent` — Agent entity references
- `comment` — Comment annotations on KB documents
- `file` — File attachments and references
- `group` — Group access control and permissions
- `invite` — Sharing and access invitation workflows
- `message` — Cross-reference to conversation context
- `notification` — Change notifications
- `pocket` — Pocket/saved item integration
- `session` — Request session and user context
## Key Endpoints and Operations
Based on the comment, this domain exposes the following workspace-scoped KB endpoints:
**Search**: Query knowledge base documents with optional filtering and ranking.
**Ingest**: Add new documents to the knowledge base, likely triggering indexing and event notifications.
**Browse**: Navigate KB structure (hierarchies, collections, tags) for discovery and exploration.
**Lint**: Validate KB documents for quality standards (completeness, format, metadata, etc.).
**Stats**: Aggregate and report on KB statistics (document count, update frequency, access patterns, etc.).
## How It Works
### Request Flow
1. **HTTP Request** arrives at a KB endpoint (e.g., `POST /workspaces/{workspace_id}/kb/search`)
2. **FastAPI Routing** (via `router`) matches the request to a handler
3. **Dependency Injection** (via `deps`) injects:
- `session` — Current user and workspace context
- `license` — License tier validation
- `workspace` — Workspace-specific KB configuration
4. **Authorization Check** — Handler verifies user has required permissions via `group` and `user` modules
5. **Business Logic Execution** (via `core`) — Performs the actual operation (search query, document ingestion, etc.)
6. **Event Publishing** (via `event_handlers`) — Publishes KB mutations for consistency (indexing, notifications, audit)
7. **Response** — Returns results to client
### Data Flow
- **Ingest**: Documents flow from client → handler → `core` business logic → storage → `event_handlers` → indexing/notifications
- **Search**: Query parameters → `core` search logic → ranking → response
- **Lint**: Documents → validation rules in `core` → error report
- **Stats**: Aggregate operations on stored KB data → statistics response
## Authorization and Security
Knowledge Base access is governed by multiple layers:
1. **Workspace Scoping** — All KB operations are workspace-scoped; users can only access KB within authorized workspaces
2. **License Tier** — The `license` module gates KB features (e.g., advanced search may require premium tier)
3. **Group Permissions** — The `group` module defines who can ingest, browse, lint, or manage KB within a workspace
4. **User Context** — The `user` module provides identity; the `session` module provides request-level user/workspace context
5. **Audit Events** — The `event_handlers` module likely publishes events for audit logging of KB modifications
## Dependencies and Integration
### Why KB Depends on These Modules
| Dependency | Why | Usage Pattern |
|---|---|---|
| `workspace` | KB is workspace-scoped | Every KB operation validates workspace context |
| `user`, `session` | Identify who is accessing KB | Request handlers inject current user/session |
| `license` | Gate premium KB features | Tier-based endpoint availability |
| `event_handlers` | Maintain consistency | Publish events on ingest/delete for indexing and notifications |
| `group` | Enforce KB access control | Check group membership for permission to operate |
| `core` | Encapsulate KB business logic | Router delegates to core for actual operations |
| `agent_bridge`, `agent` | Integrate KB with agentic workflows | Agents may query or populate KB |
| `file`, `comment`, `message` | Cross-domain references | KB documents may attach files, receive comments, relate to messages |
| `notification`, `pocket` | UX integration | Notify users of KB changes; save KB items |
### How KB is Used
The KB domain is likely consumed by:
- **Frontend clients** via REST API exposed by `router`
- **Agents** via `agent_bridge` for context retrieval
- **Other domain modules** for embedded knowledge features (e.g., message context enrichment)
## Design Decisions
### 1. Domain-Driven Design
KB is organized as a **separate bounded domain** (`kb/`) within the cloud workspace, not scattered across other modules. This enforces cohesion and reduces coupling.
### 2. Workspace Scoping Pattern
All KB operations are workspace-scoped. This is enforced consistently through the `workspace` dependency and session context, preventing data leakage across organizations.
### 3. Event-Driven Consistency
Rather than KB handlers directly triggering indexing or notifications, they publish events via `event_handlers`. This decouples KB business logic from downstream concerns and enables non-blocking operations.
### 4. Multi-Layered Authorization
Authorization is not just binary (allowed/denied) but layered: license tier gates features, groups gate access, and user context validates ownership. This supports fine-grained access control in enterprise environments.
### 5. Stateless Handler Pattern
The `router` module uses stateless handlers (typical FastAPI style) that rely entirely on dependency injection for context. This simplifies testing and horizontal scaling.
### 6. Functional Decomposition
The five main endpoints (search, ingest, browse, lint, stats) decompose KB responsibilities into focused, composable operations rather than monolithic CRUD.
## When to Use This Module
**Use KB if you need to**:
- Allow users to store, organize, and search knowledge documents within a workspace
- Ingest external data sources into a centralized knowledge repository
- Quality-assure knowledge through linting and validation
- Analyze KB usage and document statistics
- Integrate knowledge with agents or AI workflows
- Control KB access via workspace and group permissions
**Don't use KB if**:
- You're building a general-purpose document search (use generic file search instead)
- Users don't need permission-based access control
- You're operating outside of workspace context
- Your use case doesn't require enterprise edition features
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,57 @@
# Cloud Document Models Re-export Hub for Beanie ODM
> This module serves as a central re-export point for Beanie ODM document definitions used in the EE Cloud application. It consolidates imports from 11 specialized model modules and defines a core list of documents used throughout the system.
**Categories:** Database Models, Cloud Infrastructure, Enterprise Edition (EE) Architecture
**Concepts:** Beanie ODM, Document Models, User, Agent, Workspace, Message, Comment, Notification, Session, Group
**Words:** 240 | **Version:** 2
---
## Overview
The `ee.cloud.models.__init__` module functions as a centralized hub for re-exporting all Beanie ODM document model definitions across the EE Cloud infrastructure. This pattern enables cleaner imports and maintains a single source of truth for document model availability.
## Model Categories and Imports
### User and Authentication Models
- **User**: Core user entity with associated `OAuthAccount` and `WorkspaceMembership` classes
- Imported from `ee.cloud.models.user`
### Agent Models
- **Agent**: Agent entity with configuration
- **AgentConfig**: Configuration settings for agents
- Imported from `ee.cloud.models.agent`
### Workspace Models
- **Workspace**: Workspace entity with associated settings
- **WorkspaceSettings**: Configuration for workspace-level preferences
- **WorkspaceMembership**: User membership within workspaces
- Imported from `ee.cloud.models.workspace`
### Collaboration and Communication Models
- **Message**: Message entity with `Mention`, `Attachment`, and `Reaction` sub-classes
- **Comment**: Comment entity with `CommentAuthor` and `CommentTarget`
- Imported from `ee.cloud.models.message` and `ee.cloud.models.comment`
### Organization Models
- **Group**: Group entity with associated `GroupAgent`
- Imported from `ee.cloud.models.group`
### Utility and Infrastructure Models
- **Session**: User session tracking
- **Notification**: Notification entity with `NotificationSource`
- **Invite**: Invitation entity
- **FileObj**: File object storage
- **Pocket**: Data container with `Widget` and `WidgetPosition` classes
- Imported from respective model modules
## Core Documents List
The module defines `ALL_DOCUMENTS` containing the primary document classes used throughout the system:
```
User, Agent, Pocket, Session, Comment, Notification, FileObj, Workspace, Invite, Group, Message
```
This list serves as the canonical reference for which documents are actively managed by the Beanie ODM layer.

View File

@@ -0,0 +1,134 @@
# ee.cloud.sessions — Entry point and router export for session management APIs
> This module serves as the public API entry point for the sessions package, exporting the FastAPI router that handles all session-related HTTP endpoints. It exists to provide clean separation between internal session implementation details and the application's route registration, following the standard FastAPI pattern of organizing routers in dedicated modules. The module acts as a facade for session management in the enterprise cloud layer, connecting session business logic to the HTTP API layer.
**Categories:** API router and HTTP layer, Enterprise cloud features, Package structure and organization, Session management domain
**Concepts:** FastAPI router, re-export pattern, public API facade, package initialization, route registration, import aggregation, enterprise cloud (ee.cloud) namespace, multi-tenancy and workspace scoping, license validation, user authentication
**Words:** 817 | **Version:** 1
---
## Purpose
This `__init__.py` module provides the clean public interface for the `ee.cloud.sessions` package. Its single responsibility is to export the `router` object from the `router` module, which contains all FastAPI route definitions for session management operations.
In the pocketPaw architecture, the sessions package handles user session lifecycle management—creating, managing, and terminating user sessions in the cloud environment. By isolating the router export in `__init__.py`, the package follows standard Python and FastAPI conventions:
- **Clean namespace**: Consumers of this package import from `ee.cloud.sessions` rather than `ee.cloud.sessions.router`
- **Implementation hiding**: Internal submodules like `router`, `models`, and potential `service` modules remain implementation details
- **Clear API surface**: The exported `router` object is the contract—anything else is internal
- **Flexibility**: Future refactoring can reorganize internal modules without affecting imports elsewhere
This module is part of the **enterprise cloud (ee.cloud)** layer, which adds multi-tenancy, licensing, and advanced collaboration features on top of core functionality.
## Key Classes and Methods
No classes or functions are defined in this module. The single action is:
```python
from ee.cloud.sessions.router import router # noqa: F401
```
**`router` (exported object)**
- **Type**: FastAPI `APIRouter` instance
- **Purpose**: Aggregates all HTTP route handlers for session operations (create, read, update, delete, validate sessions)
- **Usage**: Imported and registered in the main application to attach session endpoints to the HTTP API
- **Pattern**: This is the standard FastAPI router pattern—endpoint handlers are organized in `router.py` and exported via `__init__.py`
The `# noqa: F401` comment suppresses linting warnings about unused imports, since the import's purpose is re-exporting rather than using the object within this module.
## How It Works
**Import flow**:
1. Application root (likely in a main FastAPI app file) imports: `from ee.cloud.sessions import router`
2. This triggers execution of `ee/cloud/sessions/__init__.py`
3. The `__init__.py` imports `router` from the `router` submodule
4. The FastAPI app registers this router: `app.include_router(router)`
5. All routes defined in `router.py` become available as HTTP endpoints
**No runtime logic**: This module performs no operations at runtime beyond the import statement. It's purely structural—a Python packaging convention that creates a clean API boundary.
## Authorization and Security
Authorization logic is not present in this module. However, the `router` object it exports likely contains:
- **Dependency injection** of authentication/authorization checks (FastAPI Depends)
- **License validation** (via the `license` module imported in the broader package)
- **Workspace scoping** (ensuring users can only access sessions in their workspace)
- **User validation** (via the `user` module)
All security decisions are delegated to the `router` module and its handler functions.
## Dependencies and Integration
**Direct dependencies**:
- `ee.cloud.sessions.router` — Contains the FastAPI router with actual endpoint implementations
**Indirect dependencies** (through router.py, not shown here but implied by the import graph):
- `errors` — Custom exception types for session errors
- `workspace`, `license`, `user` — Domain models and validation for multi-tenant, licensed sessions
- `event_handlers` — Session lifecycle event publishing (e.g., session created, session expired)
- `agent_bridge`, `agent`, `comment`, `file`, `group`, `invite`, `message`, `notification`, `pocket` — Cross-domain features that interact with sessions
- `core` — Base utilities, likely including database models and common service patterns
- `deps` — FastAPI dependency definitions for request-level injection
**What depends on this module**:
- Application root/main entry point (imports `router` to register routes)
- Likely no internal imports within the package—other session modules import from each other directly
**Integration pattern**: This follows the standard FastAPI layered architecture:
```
HTTP Layer (FastAPI routes in router.py)
↓ (imports)
Business Logic Layer (session services, handlers)
↓ (imports)
Data Layer (models, database access via core)
```
## Design Decisions
**1. Router aggregation in separate module**
- **Decision**: Keep route definitions in `router.py`, export in `__init__.py`
- **Why**: Separates route structure (HTTP concerns) from package initialization. Allows `__init__.py` to stay focused on public API without cluttering router logic.
**2. Re-export pattern**
- **Decision**: Single `from X import Y` statement
- **Why**: Minimal, clean, and explicit. Makes it immediately clear what the public API is.
- **Trade-off**: Could have defined `__all__` for more explicit control, but unnecessary for a single export.
**3. No custom initialization logic**
- **Decision**: No code beyond the import
- **Why**: Sessions are stateless from an app-startup perspective. All state management happens at request time via the router and handlers.
**4. Location in ee.cloud namespace**
- **Decision**: Sessions are under `ee.cloud`, not `core`
- **Why**: Sessions are enterprise features—they're tied to licensing, multi-tenancy, and workspace scoping. They're not part of the open-source or basic feature set.
## Architectural Context
Within pocketPaw, the session system handles:
- **User authentication state**: Who is logged in
- **Multi-device support**: Users may have multiple active sessions
- **Expiration and refresh**: Sessions timeout and can be renewed
- **Workspace isolation**: Sessions are scoped to workspaces
- **Event emission**: Session lifecycle triggers are published to event handlers (for logging, notifications, etc.)
This module is the HTTP entry point for all of that functionality—the router it exports defines endpoints like `POST /sessions`, `GET /sessions/{id}`, `DELETE /sessions/{id}`, etc.
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,180 @@
# ee.cloud.workspace — Router re-export for FastAPI workspace endpoints
> This module serves as the public entry point for the workspace domain's FastAPI router. It re-exports the `router` object from the `router` submodule, making workspace API endpoints discoverable and mountable by the application's main FastAPI instance. As a thin re-export layer, it acts as a facade that decouples the application's router mounting logic from the internal organization of workspace routing.
**Categories:** Workspace Domain, API Router / Endpoint Layer, Module Architecture / Facade Pattern, Enterprise Features
**Concepts:** FastAPI APIRouter, router re-export, facade pattern, module encapsulation, route mounting, public API boundary, multi-tenant workspace, enterprise edition (ee), stateless routing layer, dependency injection (FastAPI deps)
**Words:** 923 | **Version:** 1
---
## Purpose
This `__init__.py` module exists for one explicit purpose: **to publicly expose the workspace domain's FastAPI router** as a single, importable symbol.
In FastAPI applications, routers are typically defined in a dedicated module and then imported and mounted on the main application instance. This `__init__.py` achieves that by re-exporting the `router` object from `ee.cloud.workspace.router`, creating a clean public API for the workspace domain.
### Why This Pattern?
The re-export pattern provides several architectural benefits:
1. **Module Encapsulation**: Allows the internal structure of workspace routing to change without affecting external consumers. If routing logic is reorganized or split into multiple files, only this re-export needs updating.
2. **Clear Public Interface**: Callers only need to import from `ee.cloud.workspace` rather than navigating to `ee.cloud.workspace.router`. This signals "this is the intended public API."
3. **Facade Pattern**: Acts as a facade for the workspace domain, hiding implementation details while exposing exactly what external code needs: the router.
### Role in System Architecture
This module is part of the **Enterprise Edition (ee)** cloud workspace subsystem, which appears to be a multi-tenant workspace management system supporting:
- User and group management (see `user`, `group` imports)
- File and comment handling (`file`, `comment`)
- Messaging and notifications (`message`, `notification`)
- Session and authentication management (`session`)
- Event handling and agent integration (`event_handlers`, `agent_bridge`, `agent`)
- Licensing and dependency management (`license`, `deps`)
The router exposed here registers all HTTP endpoints that handle workspace domain operations, making them discoverable to the FastAPI application router.
## Key Classes and Methods
This module contains no classes or custom methods—it is purely a re-export mechanism.
### Exported Symbol
**`router`** (FastAPI.APIRouter)
- **Source**: `ee.cloud.workspace.router.router`
- **Purpose**: The FastAPI router instance containing all workspace-domain HTTP endpoint definitions
- **Usage**: Expected to be mounted on the main FastAPI application instance via `app.include_router(router)`
## How It Works
### Import Flow
```
Application Bootstrap
from ee.cloud.workspace import router
This __init__.py loads
Imports router from ee.cloud.workspace.router
Re-exports as module-level symbol
Application mounts: app.include_router(router)
All workspace endpoints become available
```
### When This Module Is Used
1. **Application Startup**: The main FastAPI application imports this module during initialization to discover and register workspace routes.
2. **Route Discovery**: Any middleware or tooling that needs to enumerate available routes can inspect the router object.
3. **Testing**: Test frameworks may import the router to test endpoint handlers in isolation.
## Authorization and Security
This module itself implements no authorization logic—it is purely structural. Authorization is implemented within:
- Individual endpoint handlers in `ee.cloud.workspace.router`
- Dependency injection patterns used by FastAPI (likely leveraging the `core` module)
- Request-level middleware
- The `license` module (enterprise feature gating)
The workspace router's endpoints are expected to enforce:
- **Multi-tenancy**: Scoping operations to the authenticated user's workspaces
- **Role-Based Access Control (RBAC)**: Via `user` and `group` management
- **Feature Licensing**: Via the `license` module for enterprise features
## Dependencies and Integration
### Direct Dependency
- **`ee.cloud.workspace.router`**: Provides the `router` object to be re-exported
### Implied Dependencies (via workspace.router)
Based on the import graph, the workspace domain integrates with:
- **`errors`**: Custom exception definitions for workspace operations
- **`user`**: User management and authentication context
- **`group`**: Group/team management within workspaces
- **`file`**: File storage and retrieval
- **`comment`**: Comment/annotation functionality
- **`message`**: Messaging within workspaces
- **`notification`**: Real-time or async notification delivery
- **`session`**: Session management and authentication state
- **`license`**: Enterprise license verification for workspace features
- **`pocket`**: Likely a core service or model layer (name suggests pocket/nested data structures)
- **`event_handlers`**: Event-driven architecture for workspace lifecycle events
- **`agent_bridge`**: Integration with agent/bot systems
- **`agent`**: Agent/bot definitions and lifecycle
- **`invite`**: Workspace or group invitation functionality
- **`deps`**: Shared FastAPI dependencies (authentication, request context, etc.)
- **`core`**: Core business logic or utilities
### What Depends on This Module
- **Main Application Bootstrap Code**: The top-level `main.py` or application factory imports `from ee.cloud.workspace import router` to mount workspace endpoints
- **API Documentation Generators**: Tools that scan routes to generate OpenAPI specs
- **Router Aggregators**: Code that collects routers from multiple domains and mounts them
## Design Decisions
### 1. Re-Export Pattern
Rather than defining the router directly in `__init__.py`, it is imported from a submodule (`router`). This is intentional:
- **Separation of Concerns**: Router definitions are kept in a dedicated module
- **Scalability**: If routing becomes complex, it can be split into multiple files within the workspace module without changing the public API
### 2. `# noqa: F401` Comment
The `noqa: F401` annotation tells linters to ignore the "imported but unused" warning. This is necessary because:
- The import statement defines a public API (re-export)
- Linters cannot detect that the symbol is used by external code
- The annotation explicitly documents the intentional re-export
### 3. Minimal Module Content
The module is intentionally thin. This reflects a **facade pattern** where the workspace domain exposes a minimal, stable public interface while keeping implementation details encapsulated.
### 4. Enterprise Edition (ee) Packaging
Placement in the `ee` (Enterprise Edition) directory signals this is a premium feature, likely:
- Gated by license checks
- Subject to compliance or audit requirements
- Potentially excluded from open-source or community editions
## Connection to Larger System
This module is part of a **modular, multi-domain architecture** where:
- Each domain (workspace, auth, storage, etc.) publishes a router
- The main application aggregates these routers
- Domains can evolve independently
- Clear boundaries prevent circular dependencies
The workspace domain itself appears to be **feature-rich**, supporting collaborative work through users, groups, files, messages, comments, and notifications—suggesting a platform like Slack, Notion, or Jira.
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,231 @@
# events — In-process async pub/sub event bus for decoupled cross-domain side effects
> This module provides a simple in-process publish/subscribe event bus that enables domains to react to events from other domains without creating direct dependencies. It solves the problem of tight coupling in a multi-domain architecture by allowing services to emit events that other services subscribe to, enabling side effects like notifications or group membership updates to trigger from domain events without those domains knowing about each other.
**Categories:** Infrastructure/Foundation, Event-Driven Architecture, Cross-Domain Communication, Async/Concurrency Patterns
**Concepts:** EventBus, event-driven architecture, pub/sub pattern, publish/subscribe, async/await, decoupling, cross-domain side effects, handler registration, exception isolation, sequential execution
**Words:** 1587 | **Version:** 1
---
## Purpose
The `events` module exists to solve a fundamental architectural problem: **how do you trigger side effects across domains without creating tight coupling?**
In a multi-domain system (invite domain, notification domain, group domain, etc.), you often need actions in one domain to trigger reactions in another. For example, when an invite is accepted, you might need to:
- Create a notification
- Auto-add the user to a group
- Update analytics
- Send a webhook
Without an event bus, the invite domain would need to import and directly call functions from the notification, group, and analytics domains. This creates a tangled dependency graph where every domain knows about every other domain.
The `EventBus` solves this by providing a **pub/sub (publish/subscribe) contract**: domains emit events without knowing who cares about them, and other domains subscribe to those events without knowing where they come from. This is a classic decoupling pattern used in event-driven architectures.
## Key Classes and Methods
### EventBus
The core class that manages all subscriptions and emissions.
**`__init__()`**
Initializes an empty event bus with a `defaultdict` that maps event names (strings) to lists of handler functions. Using `defaultdict(list)` is a design choice that eliminates the need to check if an event key exists — accessing a missing event automatically creates an empty list.
**`subscribe(event: str, handler: Handler) -> None`**
Registers a handler function to be called whenever an event is emitted. The same handler can be registered multiple times for the same event (it will be called multiple times). The handler is appended to a list in subscription order, meaning handlers are executed in the order they were registered. This is critical for predictable side effect sequencing.
**`unsubscribe(event: str, handler: Handler) -> None`**
Removes a specific handler from an event's subscription list. Uses a try/except pattern to silently ignore attempts to unsubscribe handlers that were never registered ("no-op if not subscribed"). This is defensive programming — it prevents errors in cleanup code.
**`async def emit(event: str, data: dict[str, Any]) -> None`**
The core async method that triggers all subscribed handlers for a given event. This is where the actual side effects happen. Key characteristics:
- Calls handlers **sequentially** in subscription order (not concurrently), so handlers run one after another
- **Exception safety**: if one handler raises an exception, it's logged but remaining handlers still execute (isolation between handlers)
- Uses `logger.exception()` to capture the full stack trace for debugging
- Safely gets the handler's name using `getattr(handler, "__name__", handler)` to handle lambdas or callable objects
### Handler Type Alias
```python
Handler = Callable[[dict[str, Any]], Coroutine[Any, Any, None]]
```
This type hint is critical for understanding the contract: handlers are async functions that accept a data dictionary and return nothing. They're coroutines that must be awaited.
## How It Works
### Data Flow
1. **Subscription Phase** (happens at module/application startup, or during configuration):
- A service imports `event_bus` and calls `event_bus.subscribe("invite.accepted", my_handler)`
- The handler function is stored in `_handlers["invite.accepted"]`
2. **Emission Phase** (happens when a domain action completes):
- A domain emits an event: `await event_bus.emit("invite.accepted", {"user_id": 123, ...})`
- The event bus looks up all handlers in `_handlers["invite.accepted"]`
- For each handler, it awaits the coroutine, passing the data dictionary
3. **Side Effects Execution**:
- Each handler runs sequentially and can perform async operations (database writes, API calls, etc.)
- If any handler fails, it's logged but doesn't block other handlers
### Control Flow Example
```
Invite Domain:
await event_bus.emit("invite.accepted", {"user_id": 123, "group_id": 456})
EventBus.emit() looks up handlers for "invite.accepted"
Notification Service handler runs: creates notification
Group Service handler runs: adds user to group
Analytics Service handler runs: logs event
All handlers complete (or fail safely with logging)
Invite domain continues (emitter doesn't wait or care about results)
```
### Important Edge Cases
1. **No handlers registered**: If you emit an event with no subscribers, `self._handlers[event]` creates an empty list via `defaultdict`, and the loop simply doesn't execute. No error.
2. **Handler raises exception**: The exception is caught, logged with full traceback, and execution continues to the next handler. This prevents one broken subscriber from breaking all subscribers.
3. **Emitting from a handler**: A handler can call `event_bus.emit()` again, potentially creating a chain of events. However, this is synchronous ordering — the original emit() call will await all nested emissions.
4. **Concurrent emissions**: If multiple coroutines call `emit()` at the same time, they run concurrently in the event loop. However, within a single `emit()` call, handlers run sequentially.
5. **Order matters**: Handlers execute in subscription order. If handler A calls something that is read by handler B, handler A must be subscribed first.
## Authorization and Security
This module **has no built-in authorization or security**. It's an in-process mechanism used by trusted internal code (the service layer). Key considerations:
- **No event validation**: The data dict is passed as-is to handlers. There's no schema validation, type checking, or ACL enforcement.
- **No authentication**: Any code running in the same process can subscribe or emit any event.
- **Information leakage risk**: Event data contains raw domain information. If a handler is compromised or misconfigured, it could access data it shouldn't.
**Security responsibility** belongs to the callers: each domain should only emit events with appropriate data, and handlers should only subscribe to events they should process. This is a **convention-based security model**.
## Dependencies and Integration
### What This Module Depends On
- **Python standard library only**: `logging`, `collections`, `collections.abc`, `typing`
- No external packages or database access
- This is intentional — the event bus is a lightweight infrastructure component
### What Depends on This Module
Based on the import graph, **four services depend on `events`**:
1. **message_service**: Likely subscribes to events like "user.created" or "group.updated" to trigger message-related side effects
2. **service**: A core service module that orchestrates domain logic and probably emits domain events
3. **agent_bridge**: Likely subscribes to events to send information to external agents or webhooks
4. **event_handlers**: A dedicated module (possibly in `handler_registry.py` or similar) that registers all event subscriptions during application startup
### Typical Integration Pattern
```
Domain Layer (e.g., invite_service):
- Performs core domain logic
- Calls: await event_bus.emit("invite.accepted", {...})
Event Handlers Layer (handler registration):
- Subscribes notification_handler to "invite.accepted"
- Subscribes group_handler to "invite.accepted"
- Subscribes analytics_handler to "invite.accepted"
Message/Notification Layer:
- Async handler that creates notifications on event
Group Layer:
- Async handler that manages group membership on event
```
This creates a **clean dependency graph** where the core domain doesn't know about side effects.
## Design Decisions
### 1. **Sequential Handler Execution (Not Concurrent)**
Handlers are awaited sequentially with `await handler(data)` inside a for loop. This means:
- **Pro**: Predictable ordering, easier debugging, no race conditions between handlers
- **Con**: If one handler is slow, all handlers after it are blocked
- **Reasoning**: For side effects, ordering and consistency matter more than latency. If you need true concurrency, you can use `asyncio.gather()` in the calling code.
### 2. **Graceful Exception Handling**
Exceptions in handlers are logged but don't stop other handlers. This prevents cascading failures:
- **Pro**: Resilience — one broken handler doesn't break all subscribers
- **Con**: Silent failures — exceptions are logged but not raised to the caller, so the emitter doesn't know if side effects failed
- **Reasoning**: Event handlers are often "fire and forget" side effects. The original action (e.g., accept invite) shouldn't fail because a notification failed to send.
### 3. **Module-Level Singleton**
```python
event_bus = EventBus()
```
A single global instance is created and imported throughout the codebase. This ensures:
- **Pro**: Simple API, no DI container needed, consistent subscriptions across the app
- **Con**: Global state, harder to test in isolation, tightly couples to this module
- **Reasoning**: This is an infrastructure component that's meant to be a shared utility. The entire app uses one event bus.
### 4. **Type Alias for Handlers**
The `Handler` type is explicit: `Callable[[dict[str, Any]], Coroutine[Any, Any, None]]`. This:
- **Pro**: Clear contract, IDE autocomplete works, type checkers enforce the signature
- **Con**: Uses `Any` heavily, doesn't capture semantic meaning of data dict
- **Reasoning**: Without schema libraries like Pydantic, `dict[str, Any]` is the practical choice. Event data is loosely typed by design to avoid coupling domains.
### 5. **defaultdict vs Regular dict**
Using `defaultdict(list)` instead of regular `dict`:
- **Pro**: No KeyError if you emit an event with no handlers
- **Con**: Less explicit — you can't tell if an event name is misspelled
- **Reasoning**: Convenience over explicitness. Emitting to nobody is a valid scenario (maybe some deployments don't have all handlers).
### 6. **In-Process Only (Not Distributed)**
This is a single-process pub/sub, not a message broker:
- **Pro**: No network latency, no distributed system complexity, no external dependencies
- **Con**: Only works within one process, no cross-service events, lost on process restart
- **Reasoning**: This is for **internal side effects within the cloud service**. Cross-service communication would use message brokers (RabbitMQ, Kafka, etc.), which is out of scope here.
## Common Patterns and Usage
### Registering Handlers (Typically in handler_registry or event_handlers module)
```python
from ee.cloud.shared.events import event_bus
from notification_service import create_notification
from group_service import add_user_to_group
async def on_invite_accepted(data: dict[str, Any]) -> None:
await create_notification(data["user_id"], "Your invite was accepted!")
async def on_invite_accepted_group(data: dict[str, Any]) -> None:
await add_user_to_group(data["user_id"], data["group_id"])
event_bus.subscribe("invite.accepted", on_invite_accepted)
event_bus.subscribe("invite.accepted", on_invite_accepted_group)
```
### Emitting Events (From domain services)
```python
from ee.cloud.shared.events import event_bus
async def accept_invite(invite_id: str):
invite = await Invite.get(invite_id)
invite.status = "accepted"
await invite.save()
# Trigger side effects
await event_bus.emit("invite.accepted", {
"invite_id": invite_id,
"user_id": invite.user_id,
"group_id": invite.group_id,
})
```
---
## Related
- [untitled](untitled.md)

View File

@@ -0,0 +1,189 @@
# file — Cloud storage metadata document for managing file references
> This module defines the `FileObj` document model that stores metadata about files persisted in external cloud storage (S3, GCS, or local). Rather than storing actual file bytes in MongoDB, it maintains a lightweight reference with ownership, location, and access information. It's a critical bridge between the application's domain logic and cloud storage infrastructure.
**Categories:** data model, cloud storage, file management, MongoDB / Beanie
**Concepts:** FileObj, Document, Indexed, Beanie ODM, Pydantic Field, MongoDB collection, cloud storage metadata, pre-signed URL, S3, GCS
**Words:** 1297 | **Version:** 1
---
## Purpose
The `file` module solves a fundamental architectural problem: applications need to store files, but MongoDB is not an efficient or cost-effective choice for binary data. This module decouples file metadata (ownership, naming, access control) from file storage itself.
Instead of embedding or storing file bytes in the database, `FileObj` acts as a **pointer and metadata record**. When a user uploads or references a file, the application:
1. Stores the actual bytes in S3, GCS, or local disk
2. Creates a `FileObj` document that remembers *where* the file is and *who owns it*
3. Uses the `FileObj` to generate pre-signed URLs or validate access
This pattern is essential in modern cloud-native architectures because it:
- **Separates concerns**: Database handles structured data, object storage handles binary data
- **Enables scalability**: Files can be served directly from CDN-backed object stores
- **Controls costs**: MongoDB storage is expensive; S3/GCS is cheaper for unstructured data
- **Supports multi-tenancy**: The `owner` field enables workspace-scoped file access
## Key Classes and Methods
### `FileObj(Document)`
A Beanie ODM document representing file metadata stored in MongoDB's `files` collection.
**Fields:**
- **`owner: Indexed(str)`** — The user or workspace that owns this file. Indexed for fast lookup by owner. This is critical for multi-tenant access control—queries like "fetch all files owned by workspace X" depend on this index.
- **`file_name: str`** — The original filename as uploaded or referenced by the user (e.g., `"resume.pdf"`). Used for display and content-disposition headers in download responses.
- **`bucket: str`** — The storage bucket identifier. For S3, this might be `"my-app-prod-files"`; for GCS, `"project-files-bucket"`. Tells the application which cloud storage account to use.
- **`provider: str`** — One of `"gcs"`, `"s3"`, or `"local"`. A constrained enum validated by Pydantic's `pattern` validator. Determines which SDK the application uses to retrieve or generate signed URLs.
- **`path_in_bucket: str`** — The object key or path inside the bucket where the file actually lives (e.g., `"workspaces/123/documents/abc-def.pdf"`). This is the locator used in SDK calls like `s3_client.get_object(Bucket=bucket, Key=path_in_bucket)`.
- **`mime_type: str`** — The MIME type of the file (e.g., `"application/pdf"`, `"image/jpeg"`). Defaults to empty string. Used in HTTP Content-Type headers when serving downloads.
- **`size: int`** — File size in bytes. Defaults to 0. Used for quota enforcement, progress indicators, and validation that uploaded content matches expected size.
- **`public: bool`** — Whether the file is publicly accessible without authentication. Defaults to `False`. Used to determine whether to generate public URLs or require signed/temporary access tokens.
**Class-level Configuration:**
```python
class Settings:
name = "files"
```
Maps the `FileObj` model to the `files` MongoDB collection. Without this, Beanie would use a auto-derived or default collection name.
**No explicit methods**`FileObj` is a pure data model. It inherits from Beanie's `Document` base class, which provides:
- `save()` and `create()` for persistence
- `find()` and `find_one()` for queries
- `delete()` for removal
- Automatic `_id` and `created_at`/`updated_at` timestamps
## How It Works
### Typical File Upload Flow
1. **User uploads a file** via API (e.g., multipart form data)
2. **Application validates** the file (size, type, quota)
3. **Application uploads bytes to cloud storage** (S3/GCS) and gets back a cloud-side path or key
4. **Application creates a `FileObj` document**:
```python
file_obj = FileObj(
owner="workspace_123",
file_name="report.xlsx",
bucket="prod-files",
provider="s3",
path_in_bucket="workspaces/123/uploads/report-uuid.xlsx",
mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
size=2048576,
public=False
)
await file_obj.create()
```
5. **Application returns the `FileObj.id`** (MongoDB ObjectId) to the client
### File Download/Access Flow
1. **Client requests file** by `FileObj.id`
2. **Application retrieves the `FileObj`** record
3. **Application validates ownership**: Check if `request.user.workspace == file_obj.owner`
4. **Application generates a pre-signed URL** using the `provider`, `bucket`, and `path_in_bucket` fields
5. **Application returns the URL** (or redirects to it)
6. **Client/browser downloads directly from cloud storage**, bypassing the application
### Query Patterns
Because `owner` is indexed:
```python
# Fast: indexed lookup
user_files = await FileObj.find(FileObj.owner == "user_123").to_list()
# Slower but possible: filter by provider
local_files = await FileObj.find(FileObj.provider == "local").to_list()
# Combined: workspace files that are public
public_workspace_files = await FileObj.find(
FileObj.owner == "workspace_456",
FileObj.public == True
).to_list()
```
## Authorization and Security
**Access Control is NOT enforced in this module**—it's a responsibility of the **caller**. The `FileObj` itself has no methods to validate access; it's just a data container.
**The `owner` field is the key**: Wherever files are retrieved or downloaded, the calling code must verify:
```python
file_obj = await FileObj.get(file_id)
if file_obj.owner != current_user.workspace_id:
raise PermissionError("Cannot access this file")
```
**The `public` flag is informational**: It signals intent but does not enforce access. The API layer is responsible for checking this flag and deciding whether to grant unauthenticated access.
**Pre-signed URLs are time-limited**: When the application generates a pre-signed URL (via AWS SDK or GCS client), the cloud provider itself expires it after a period (typically 1 hour). This ensures files cannot be downloaded indefinitely with a leaked link.
## Dependencies and Integration
**Direct Dependencies:**
- **Beanie** (`from beanie import Document, Indexed`) — ODM (Object-Document Mapper) for MongoDB. Provides the base `Document` class and the `Indexed` type annotation for indexing.
- **Pydantic** (`from pydantic import Field`) — Data validation and serialization. The `Field` with `pattern` validator enforces that `provider` is one of the three allowed strings.
**Indirect Dependencies:**
- **MongoDB** — The persistence layer. `FileObj` records are stored and queried here.
- **AWS S3 SDK** or **Google Cloud Storage SDK** — Used by higher-level code to upload/download bytes and generate pre-signed URLs. This module does not depend on those SDKs directly; it just records the metadata needed to use them.
**Imported By:**
- **`__init__.py`** (in the parent `ee/cloud/models/` package) — Exports `FileObj` so other modules can import it as `from pocketPaw.ee.cloud.models import FileObj`.
**Used By (expected):**
- **File upload/download API routes** — Handle HTTP requests, validate access, call cloud SDKs, and create/retrieve `FileObj` documents
- **Workspace/organization services** — May query files by owner for listing or cleanup
- **Sharing/permission services** — May modify `public` flag or create access tokens for specific files
- **Quota/billing services** — Aggregate `size` field across workspace files to enforce limits
## Design Decisions
### 1. **Metadata-Only Model**
The module stores *only* metadata, not bytes. This is intentional. Storing binary data in MongoDB would:
- Inflate database size and backup costs
- Cause slower queries (binary fields slow down indexing)
- Complicate replication and sharding
By keeping only pointers, `FileObj` documents are lightweight and queryable.
### 2. **Multi-Provider Support**
The `provider` field (gcs | s3 | local) allows the application to support multiple storage backends. This enables:
- **Gradual migration** from local to S3, or S3 to GCS, without re-uploading
- **Hybrid deployments** where different workspaces use different storage
- **Testing** with local storage in dev, S3 in prod
### 3. **Pre-signed URL Pattern**
The design assumes the application will generate pre-signed (temporary, signed) URLs rather than proxying downloads through the application. This is efficient because:
- Cloud storage CDNs are faster and cheaper than application servers
- Reduces load on application servers
- Leverages cloud provider's security (signatures are cryptographically valid for only the specified object, method, and time)
### 4. **Indexed Owner Field**
The `owner` field is indexed because:
- Workspaces frequently list "my files" — a query on `owner`
- Access control checks happen on almost every request — index ensures sub-millisecond validation
- It's the only field with this pattern in the current model
### 5. **Beanie ODM Choice**
Using Beanie (an async-first MongoDB ODM) implies the application is:
- Built on async/await (likely FastAPI or similar)
- Comfortable with Python OOP abstractions over raw pymongo
- Willing to trade some flexibility for type safety and validation
### 6. **Minimal Defaults**
Fields like `mime_type` and `size` default to empty/zero. This allows creation of `FileObj` records even if those details are not immediately available, supporting two-phase uploads (create metadata stub, populate details later). It also prevents validation errors if callers are uncertain about a field's value.
---
## Related
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)

View File

@@ -0,0 +1,152 @@
# group — Multi-user chat channels with AI agent participants
> This module defines the data models for chat groups/channels that support multiple human users and AI agent participants, similar to Slack channels. It exists as a separate concern to cleanly separate the group entity definition from business logic, enabling other modules (group_service, routers, event handlers) to depend on a single source of truth for group structure. As a foundational data model, it sits at the core of the chat/collaboration system architecture.
**Categories:** data model — core persistent entity, chat/collaboration — domain area for group conversations, multi-user feature — supports multiple participants with different roles, MongoDB/Beanie — database technology and ORM layer
**Concepts:** Group — multi-user conversation space entity, GroupAgent — agent assignment with configurable response behavior, TimestampedDocument — base class adding created_at/updated_at, Workspace scoping — tenant isolation via workspace field, Soft delete pattern — archived flag instead of hard delete, Denormalization — message_count, last_message_at, pinned_messages cached on group, Composite indexing — (workspace, slug) index for efficient tenant-scoped queries, Respond mode — agent participation control (mention_only, auto, silent, smart), Type validation — pattern regex for public/private/dm type field, Beanie ODM — MongoDB object mapping and indexing
**Words:** 1325 | **Version:** 1
---
## Purpose
The `group` module defines the persistent data structures for multi-user conversation spaces within a workspace. It solves the architectural problem of representing "channels" or "groups" where:
- Multiple **human users** can participate together
- **AI agents** can be assigned with different participation modes (mention-only, auto-respond, silent, or smart modes)
- Groups can have different visibility levels (public, private, direct message)
- Metadata like messages, pins, and activity tracking are maintained
This module exists separately because the Group entity is referenced by many other parts of the system (group_service for business logic, routers for HTTP endpoints, event_handlers for real-time updates, agent_bridge for agent interactions). By centralizing the data model, the system maintains a single source of truth about what a group is, avoiding duplication and drift.
## Key Classes and Methods
### GroupAgent
Represents a single AI agent assignment within a group with configurable behavior:
**Fields:**
- `agent: str` — The unique identifier of the AI agent being assigned
- `role: str` — The agent's responsibility level: `"assistant"` (responds helpfully), `"listener"` (observes only), or `"moderator"` (enforces rules). Defaults to `"assistant"`.
- `respond_mode: str` — Controls when the agent participates:
- `"mention_only"` — Only responds when explicitly mentioned (default, lowest noise)
- `"auto"` — Responds to all messages automatically (highest engagement)
- `"silent"` — Never responds, purely observational
- `"smart"` — Responds intelligently based on context and relevance
**Business Logic:** This is a composition pattern allowing flexible agent configuration without modifying group structure itself. An agent can have both a role (what it does) and a respond mode (when it does it).
### Group
The core persistent entity representing a conversation space, extending `TimestampedDocument` (which adds `created_at` and `updated_at`).
**Core Fields:**
- `workspace: Indexed(str)` — Workspace ID; indexed because queries almost always filter by workspace (tenant isolation)
- `name: str` — Human-readable group name (e.g., "engineering-chat")
- `slug: str` — URL-safe identifier (derived from name, enables `/groups/{slug}` URLs)
- `description: str` — Optional group purpose/topic
- `icon: str`, `color: str` — UI presentation metadata
- `type: str` — Visibility/access control: `"public"` (all workspace members), `"private"` (invite-only), `"dm"` (direct message between 2-3 people). Validated with regex pattern.
**Participants:**
- `members: list[str]` — User IDs of human participants (defaults to empty; populated when users join)
- `agents: list[GroupAgent]` — AI agents assigned to this group with their individual configs
- `owner: str` — User ID of the group creator/owner (used for permission checks)
**Content and Activity:**
- `pinned_messages: list[str]` — Message IDs of messages pinned to top (denormalized for quick retrieval)
- `message_count: int` — Running counter of total messages (for analytics, pagination hints)
- `last_message_at: datetime | None` — Most recent message timestamp (enables "updated recently" sorting and activity detection)
**Lifecycle:**
- `archived: bool` — Soft delete: `True` means the group is inactive but preserved for history (allows unarchiving)
**Database Settings:**
- `Settings.name = "groups"` — MongoDB collection name
- `Settings.indexes` — Composite index on `(workspace, slug)` ensures slug uniqueness within a workspace and enables fast lookups by workspace + slug
## How It Works
### Data Flow
1. **Group Creation:** When a user creates a group via the router, a Group instance is instantiated with `owner=user_id`, `members=[owner]` initially, `workspace=current_workspace`, and timestamp defaults.
2. **Agent Assignment:** The group_service receives a list of GroupAgent configs and appends them to the `agents` field. Each GroupAgent specifies an agent ID and its participation rules.
3. **Message Ingestion:** When messages arrive (from event_handlers or message_service), the `message_count` is incremented and `last_message_at` is updated.
4. **Querying:** The router typically queries groups by `workspace + slug` (leveraging the composite index) to fetch a specific group, or by `workspace + archived=False` to list active groups.
5. **Agent Bridge Integration:** The agent_bridge reads the `agents` list and `respond_mode` to determine when/how to invoke each agent on new messages.
### Edge Cases and Design Decisions
**Soft Delete Pattern:** The `archived` field is a soft delete—groups are never truly removed, preserving message history and audit trails. This is critical for compliance and customer support ("when was this discussion?" can always be answered).
**Denormalized Message Metadata:** Fields like `message_count`, `last_message_at`, and `pinned_messages` are denormalized (stored on the group document rather than computed from a messages collection). This trades write complexity for read speed—displaying a group list requires zero joins. The group_service is responsible for keeping these consistent when messages are created/deleted.
**Workspace Scoping:** Every group belongs to exactly one workspace, enforced at the data model level via the `workspace` field. This is foundational multi-tenancy: queries always filter by workspace, preventing accidental cross-tenant leaks.
**Type Validation:** The `type` field uses Pydantic's `pattern` validator to ensure only valid types are accepted, failing fast at deserialization rather than allowing invalid states.
**Optional Timestamps:** `last_message_at` is `None` for newly created groups with no messages, allowing the system to distinguish "no messages yet" from "very old last message."
## Authorization and Security
This module itself has no authorization logic—it's a pure data model. However, it provides the structure that enables authorization elsewhere:
- **Ownership Check:** Routers and services use the `owner` field to verify if the requesting user can delete/edit group settings.
- **Membership Check:** The `members` list determines if a user can view/post messages in the group.
- **Type-Based Access:** The `type` field signals to upstream logic whether access is public (no check), invite-only (check membership), or DM (check if one of exactly 2 members).
The actual enforcement happens in group_service and routers, not here.
## Dependencies and Integration
### Dependencies (What This Module Imports)
- **`base` module:** Imports `TimestampedDocument`, a base class that adds `created_at` and `updated_at` fields. This is a foundational abstraction for all persistent entities in the system.
- **`beanie`:** ODM (Object-Document Mapper) providing `Indexed()` for marking fields for database indexing. Beanie handles the mapping between Python objects and MongoDB documents.
- **`pydantic`:** Type validation and serialization. `BaseModel` and `Field` enable runtime type checking, JSON schema generation, and error messages.
- **`datetime`:** Standard library for timestamp types.
### Reverse Dependencies (What Imports This Module)
- **`group_service`:** Contains business logic for creating, updating, querying, and archiving groups. Reads and modifies Group instances.
- **`router`:** HTTP API endpoints for group CRUD operations. Serializes/deserializes Group instances to/from JSON.
- **`__init__` (package init):** Re-exports Group and GroupAgent for public API (other modules import from the models package).
- **`agent_bridge`:** Reads the `agents` list and `respond_mode` to dispatch messages to appropriate agents.
- **`event_handlers`:** Listens for group events (creation, member join, message arrival) and updates Group fields or triggers side effects.
### Integration Points
```
Group (this module)
↓ extends
TimestampedDocument (base module)
Used by:
├─ group_service: CRUD operations, membership management
├─ router: HTTP API endpoints
├─ agent_bridge: Agent dispatch logic
├─ event_handlers: Event processing and state updates
└─ __init__: Public API exports
```
## Design Decisions
**Composition over Inheritance for Agents:** Rather than creating a GroupWithAgents subclass, GroupAgent is a simple Pydantic model nested in the agents list. This keeps the design flat and allows agents to be added/removed without restructuring the group document.
**Beanie ODM + Pydantic:** Using Beanie (MongoDB ODM) with Pydantic models provides automatic validation, JSON serialization, and database mapping. This reduces boilerplate but ties the system to MongoDB; switching databases would require replacing Beanie.
**Indexed Workspace Field:** The `workspace` field is indexed individually because it's a frequent filter dimension ("show me all groups in my workspace"). The composite `(workspace, slug)` index is more specific and handles slug lookups efficiently.
**Denormalization Over Normalization:** Storing `message_count` and `last_message_at` on the group avoids expensive aggregations when listing groups. The trade-off is that group_service must keep these consistent, accepting higher write latency for lower read latency.
**Soft Delete with No Purge:** Archived groups are never deleted, supporting compliance, audit trails, and unarchive scenarios. A purge operation would require explicit administrative action and would not be automatic.
**Flexible Agent Modes:** The `respond_mode` field is a string enum (not a Python Enum class) for simplicity and JSON compatibility. The system is extensible: new modes can be added without code changes, only service logic updates.
---
## Related
- [base-foundational-document-model-with-automatic-timestamp-management-for-mongodb](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md)
- [untitled](untitled.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)

554
docs/wiki/index.md Normal file
View File

@@ -0,0 +1,554 @@
# Knowledge Base Index
**38 articles** | **504 concepts** | **131 categories**
## Categories
### API Gateway Layer
- [auth/__init__ — Central re-export hub for authentication and user management](authinit-central-re-export-hub-for-authentication-and-user-management.md) — This module serves as the public API facade for the entire authentication domain
### API Router / Endpoint Layer
- [ee/cloud/kb/__init__ — Knowledge Base Domain Package Initialization and Endpoint Exposure](eecloudkbinit-knowledge-base-domain-package-initialization-and-endpoint-exposure.md) — This module serves as the entry point for the Knowledge Base (KB) domain within
- [ee.cloud.workspace — Router re-export for FastAPI workspace endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md) — This module serves as the public entry point for the workspace domain's FastAPI
### API Router Layer
- [deps — FastAPI dependency injection layer for cloud router authentication and authorization](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md) — This module provides FastAPI dependency functions that extract and validate user
### API Router — Bootstrap & Mounting
- [ee.cloud.__init__ — Cloud domain orchestration and FastAPI application bootstrap](eecloudinit-cloud-domain-orchestration-and-fastapi-application-bootstrap.md) — This module is the entry point for PocketPaw's enterprise cloud layer. It bootst
### API contract layer
- [schemas — Pydantic request/response contracts for session lifecycle operations](schemas-pydantic-requestresponse-contracts-for-session-lifecycle-operations.md) — This module defines the HTTP API contracts (request bodies and response payloads
- [schemas — Pydantic request/response data models for workspace domain operations](schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations.md) — This module defines the contract between the workspace API layer and its consume
### API gateway / facade
- [chat/__init__.py — Entry point for chat domain with groups, messages, and WebSocket real-time capabilities](chatinitpy-entry-point-for-chat-domain-with-groups-messages-and-websocket-real-t.md) — This module serves as the public API gateway for the chat domain, re-exporting t
### API layer
- [core — Enterprise JWT authentication with cookie and bearer transport for FastAPI](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md) — This module implements a complete authentication system for PocketPaw using fast
- [schemas — Pydantic request/response models for agent lifecycle and discovery operations](schemas-pydantic-requestresponse-models-for-agent-lifecycle-and-discovery-operat.md) — This module defines four Pydantic BaseModel classes that serve as the contract l
- [schemas — Request/response data validation for the knowledge base REST API](schemas-requestresponse-data-validation-for-the-knowledge-base-rest-api.md) — This module defines Pydantic request/response schemas for the knowledge base dom
### API router / integration layer
- [ee.cloud.agents — Package initialization and router export for enterprise cloud agent functionality](eecloudagents-package-initialization-and-router-export-for-enterprise-cloud-agen.md) — This is a minimal package initialization module that serves as the public API en
### API router and HTTP layer
- [ee.cloud.sessions — Entry point and router export for session management APIs](eecloudsessions-entry-point-and-router-export-for-session-management-apis.md) — This module serves as the public API entry point for the sessions package, expor
### API router layer
- [pockets.__init__ — Entry point and public API aggregator for the pockets subsystem](pocketsinit-entry-point-and-public-api-aggregator-for-the-pockets-subsystem.md) — This module serves as the public interface for the enterprise cloud pockets subs
- [router — FastAPI authentication endpoints and user profile management](router-fastapi-authentication-endpoints-and-user-profile-management.md) — This module exposes HTTP endpoints for user authentication, registration, profil
### API schemas and data models
- [schemas — Pydantic models for authentication request/response validation](schemas-pydantic-models-for-authentication-requestresponse-validation.md) — This module defines three Pydantic BaseModel classes that standardize the shape
### Access Control & Security
- [Workspace Domain Service - Business Logic for Enterprise Cloud](untitled.md) — A stateless service layer that encapsulates workspace business logic including C
### Adapter/Bridge Pattern
- [backend_adapter — Adapter that makes PocketPaw's agent backends usable as knowledge base CompilerBackends](backendadapter-adapter-that-makes-pocketpaws-agent-backends-usable-as-knowledge.md) — This module provides `PocketPawCompilerBackend`, an adapter class that implement
### Agent Infrastructure
- [backend_adapter — Adapter that makes PocketPaw's agent backends usable as knowledge base CompilerBackends](backendadapter-adapter-that-makes-pocketpaws-agent-backends-usable-as-knowledge.md) — This module provides `PocketPawCompilerBackend`, an adapter class that implement
### Agent Integration Layer
- [ripple_normalizer — Normalizes AI-generated pocket specifications into a consistent, persistence-ready format](ripplenormalizer-normalizes-ai-generated-pocket-specifications-into-a-consistent.md) — This module provides a single public function, `normalize_ripple_spec()`, that t
### Async/Concurrency Patterns
- [events — In-process async pub/sub event bus for decoupled cross-domain side effects](events-in-process-async-pubsub-event-bus-for-decoupled-cross-domain-side-effects.md) — This module provides a simple in-process publish/subscribe event bus that enable
### Authentication & Authorization
- [auth/__init__ — Central re-export hub for authentication and user management](authinit-central-re-export-hub-for-authentication-and-user-management.md) — This module serves as the public API facade for the entire authentication domain
- [AuthService: Business Logic Layer for Authentication and User Profile Management](authservice-business-logic-layer-for-authentication-and-user-profile-management.md) — AuthService is a stateless FastAPI service that encapsulates authentication and
- [deps — FastAPI dependency injection layer for cloud router authentication and authorization](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md) — This module provides FastAPI dependency functions that extract and validate user
### Backend Service Architecture
- [Workspace Domain Service - Business Logic for Enterprise Cloud](untitled.md) — A stateless service layer that encapsulates workspace business logic including C
### Business Logic Layer
- [AuthService: Business Logic Layer for Authentication and User Profile Management](authservice-business-logic-layer-for-authentication-and-user-profile-management.md) — AuthService is a stateless FastAPI service that encapsulates authentication and
### CRUD
- [pocket — Data models for Pocket workspaces with widgets, teams, and collaborative agents](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md) — This module defines the core document models (Pocket, Widget, WidgetPosition) th
### CRUD operations
- [router — FastAPI authentication endpoints and user profile management](router-fastapi-authentication-endpoints-and-user-profile-management.md) — This module exposes HTTP endpoints for user authentication, registration, profil
- [schemas — Pydantic request/response contracts for session lifecycle operations](schemas-pydantic-requestresponse-contracts-for-session-lifecycle-operations.md) — This module defines the HTTP API contracts (request bodies and response payloads
### CRUD schema definition
- [schemas — Pydantic request/response models for agent lifecycle and discovery operations](schemas-pydantic-requestresponse-models-for-agent-lifecycle-and-discovery-operat.md) — This module defines four Pydantic BaseModel classes that serve as the contract l
### Chat & Messaging
- [message — Data model for group chat messages with mentions, reactions, and threading support](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md) — This module defines the Pydantic data models that represent chat messages in gro
### Cloud Domain — Orchestration
- [ee.cloud.__init__ — Cloud domain orchestration and FastAPI application bootstrap](eecloudinit-cloud-domain-orchestration-and-fastapi-application-bootstrap.md) — This module is the entry point for PocketPaw's enterprise cloud layer. It bootst
### Cloud Infrastructure
- [Cloud Document Models Re-export Hub for Beanie ODM](eecloudmodelsinit-central-re-export-hub-for-beanie-odm-document-definitions.md) — This module serves as a central re-export point for Beanie ODM document definiti
### Collaboration Features
- [comment — Threaded comments on pockets and widgets with workspace isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md) — This module defines the data models for a collaborative commenting system that e
### Core Domain Model
- [comment — Threaded comments on pockets and widgets with workspace isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md) — This module defines the data models for a collaborative commenting system that e
### Cross-Domain Communication
- [events — In-process async pub/sub event bus for decoupled cross-domain side effects](events-in-process-async-pubsub-event-bus-for-decoupled-cross-domain-side-effects.md) — This module provides a simple in-process publish/subscribe event bus that enable
### Data Model / Persistence
- [comment — Threaded comments on pockets and widgets with workspace isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md) — This module defines the data models for a collaborative commenting system that e
- [notification — In-app notification data model and persistence for user workspace events](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md) — This module defines the data models for in-app notifications that inform users a
### Data Model Layer
- [message — Data model for group chat messages with mentions, reactions, and threading support](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md) — This module defines the Pydantic data models that represent chat messages in gro
### Data Transformation & Normalization
- [ripple_normalizer — Normalizes AI-generated pocket specifications into a consistent, persistence-ready format](ripplenormalizer-normalizes-ai-generated-pocket-specifications-into-a-consistent.md) — This module provides a single public function, `normalize_ripple_spec()`, that t
### Database Models
- [Cloud Document Models Re-export Hub for Beanie ODM](eecloudmodelsinit-central-re-export-hub-for-beanie-odm-document-definitions.md) — This module serves as a central re-export point for Beanie ODM document definiti
### Domain Model
- [message — Data model for group chat messages with mentions, reactions, and threading support](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md) — This module defines the Pydantic data models that represent chat messages in gro
### Enterprise Edition (EE) Architecture
- [Cloud Document Models Re-export Hub for Beanie ODM](eecloudmodelsinit-central-re-export-hub-for-beanie-odm-document-definitions.md) — This module serves as a central re-export point for Beanie ODM document definiti
### Enterprise Edition Cloud Infrastructure
- [ee/cloud/kb/__init__ — Knowledge Base Domain Package Initialization and Endpoint Exposure](eecloudkbinit-knowledge-base-domain-package-initialization-and-endpoint-exposure.md) — This module serves as the entry point for the Knowledge Base (KB) domain within
### Enterprise Features
- [ee.cloud.workspace — Router re-export for FastAPI workspace endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md) — This module serves as the public entry point for the workspace domain's FastAPI
### Enterprise SaaS
- [Workspace Domain Service - Business Logic for Enterprise Cloud](untitled.md) — A stateless service layer that encapsulates workspace business logic including C
### Enterprise cloud features
- [ee.cloud.sessions — Entry point and router export for session management APIs](eecloudsessions-entry-point-and-router-export-for-session-management-apis.md) — This module serves as the public API entry point for the sessions package, expor
### Error Handling & Global Middleware
- [ee.cloud.__init__ — Cloud domain orchestration and FastAPI application bootstrap](eecloudinit-cloud-domain-orchestration-and-fastapi-application-bootstrap.md) — This module is the entry point for PocketPaw's enterprise cloud layer. It bootst
### Event-Driven Architecture
- [ee.cloud.__init__ — Cloud domain orchestration and FastAPI application bootstrap](eecloudinit-cloud-domain-orchestration-and-fastapi-application-bootstrap.md) — This module is the entry point for PocketPaw's enterprise cloud layer. It bootst
- [events — In-process async pub/sub event bus for decoupled cross-domain side effects](events-in-process-async-pubsub-event-bus-for-decoupled-cross-domain-side-effects.md) — This module provides a simple in-process publish/subscribe event bus that enable
- [notification — In-app notification data model and persistence for user workspace events](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md) — This module defines the data models for in-app notifications that inform users a
### Facade & Re-export Pattern
- [auth/__init__ — Central re-export hub for authentication and user management](authinit-central-re-export-hub-for-authentication-and-user-management.md) — This module serves as the public API facade for the entire authentication domain
### FastAPI HTTP endpoints
- [router — FastAPI authentication endpoints and user profile management](router-fastapi-authentication-endpoints-and-user-profile-management.md) — This module exposes HTTP endpoints for user authentication, registration, profil
### FastAPI Middleware & Dependency Injection
- [deps — FastAPI dependency injection layer for cloud router authentication and authorization](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md) — This module provides FastAPI dependency functions that extract and validate user
### FastAPI application architecture
- [ee.cloud.agents — Package initialization and router export for enterprise cloud agent functionality](eecloudagents-package-initialization-and-router-export-for-enterprise-cloud-agen.md) — This is a minimal package initialization module that serves as the public API en
### FastAPI integration
- [license — Enterprise license validation and feature gating for cloud deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md) — This module provides cryptographic validation of signed license keys, caching of
### HTTP validation layer
- [schemas — Pydantic models for authentication request/response validation](schemas-pydantic-models-for-authentication-requestresponse-validation.md) — This module defines three Pydantic BaseModel classes that standardize the shape
### Infrastructure Layer — Lifecycle Management
- [ee.cloud.__init__ — Cloud domain orchestration and FastAPI application bootstrap](eecloudinit-cloud-domain-orchestration-and-fastapi-application-bootstrap.md) — This module is the entry point for PocketPaw's enterprise cloud layer. It bootst
### Infrastructure/Foundation
- [events — In-process async pub/sub event bus for decoupled cross-domain side effects](events-in-process-async-pubsub-event-bus-for-decoupled-cross-domain-side-effects.md) — This module provides a simple in-process publish/subscribe event bus that enable
### Knowledge Base — Integration Layer
- [backend_adapter — Adapter that makes PocketPaw's agent backends usable as knowledge base CompilerBackends](backendadapter-adapter-that-makes-pocketpaws-agent-backends-usable-as-knowledge.md) — This module provides `PocketPawCompilerBackend`, an adapter class that implement
### Knowledge Management Domain
- [ee/cloud/kb/__init__ — Knowledge Base Domain Package Initialization and Endpoint Exposure](eecloudkbinit-knowledge-base-domain-package-initialization-and-endpoint-exposure.md) — This module serves as the entry point for the Knowledge Base (KB) domain within
### LLM Backend Abstraction
- [backend_adapter — Adapter that makes PocketPaw's agent backends usable as knowledge base CompilerBackends](backendadapter-adapter-that-makes-pocketpaws-agent-backends-usable-as-knowledge.md) — This module provides `PocketPawCompilerBackend`, an adapter class that implement
### Module Architecture / Facade Pattern
- [ee.cloud.workspace — Router re-export for FastAPI workspace endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md) — This module serves as the public entry point for the workspace domain's FastAPI
### MongoDB / Beanie
- [file — Cloud storage metadata document for managing file references](file-cloud-storage-metadata-document-for-managing-file-references.md) — This module defines the `FileObj` document model that stores metadata about file
### MongoDB document
- [agent — Agent configuration and metadata storage for workspace-scoped AI agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md) — This module defines the data models for storing agent configurations in the OCEA
### MongoDB persistence
- [base — Foundational document model with automatic timestamp management for MongoDB persistence](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md) — This module provides `TimestampedDocument`, a base class that extends Beanie's O
- [session — Cloud-tracked chat session document model for pocket-scoped conversations](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md) — The session module defines the Session document model that represents individual
- [workspace — Data model for organization workspaces in multi-tenant enterprise deployments](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md) — This module defines the core data models that represent a workspace: the contain
### MongoDB/Beanie Persistence
- [message — Data model for group chat messages with mentions, reactions, and threading support](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md) — This module defines the Pydantic data models that represent chat messages in gro
### MongoDB/Beanie — database technology and ORM layer
- [group — Multi-user chat channels with AI agent participants](group-multi-user-chat-channels-with-ai-agent-participants.md) — This module defines the data models for chat groups/channels that support multip
### Multi-Tenant Access Control
- [deps — FastAPI dependency injection layer for cloud router authentication and authorization](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md) — This module provides FastAPI dependency functions that extract and validate user
### Multi-tenant Architecture
- [comment — Threaded comments on pockets and widgets with workspace isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md) — This module defines the data models for a collaborative commenting system that e
### Notification / User Communication
- [notification — In-app notification data model and persistence for user workspace events](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md) — This module defines the data models for in-app notifications that inform users a
### ODM integration
- [db — MongoDB connection and Beanie ODM lifecycle management for PocketPaw cloud infrastructure](db-mongodb-connection-and-beanie-odm-lifecycle-management-for-pocketpaw-cloud-in.md) — This module provides a centralized, application-level abstraction for managing M
### Package structure and organization
- [ee.cloud.sessions — Entry point and router export for session management APIs](eecloudsessions-entry-point-and-router-export-for-session-management-apis.md) — This module serves as the public API entry point for the sessions package, expor
### Pydantic DTOs
- [schemas — Pydantic request/response data models for workspace domain operations](schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations.md) — This module defines the contract between the workspace API layer and its consume
### Security Infrastructure
- [auth/__init__ — Central re-export hub for authentication and user management](authinit-central-re-export-hub-for-authentication-and-user-management.md) — This module serves as the public API facade for the entire authentication domain
### Session management domain
- [ee.cloud.sessions — Entry point and router export for session management APIs](eecloudsessions-entry-point-and-router-export-for-session-management-apis.md) — This module serves as the public API entry point for the sessions package, expor
### Specification Management
- [ripple_normalizer — Normalizes AI-generated pocket specifications into a consistent, persistence-ready format](ripplenormalizer-normalizes-ai-generated-pocket-specifications-into-a-consistent.md) — This module provides a single public function, `normalize_ripple_spec()`, that t
### User Management
- [AuthService: Business Logic Layer for Authentication and User Profile Management](authservice-business-logic-layer-for-authentication-and-user-profile-management.md) — AuthService is a stateless FastAPI service that encapsulates authentication and
### Utility & Infrastructure
- [ripple_normalizer — Normalizes AI-generated pocket specifications into a consistent, persistence-ready format](ripplenormalizer-normalizes-ai-generated-pocket-specifications-into-a-consistent.md) — This module provides a single public function, `normalize_ripple_spec()`, that t
### Workspace / Multi-tenancy
- [notification — In-app notification data model and persistence for user workspace events](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md) — This module defines the data models for in-app notifications that inform users a
### Workspace Domain
- [ee.cloud.workspace — Router re-export for FastAPI workspace endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md) — This module serves as the public entry point for the workspace domain's FastAPI
### Workspace-Scoped Feature
- [ee/cloud/kb/__init__ — Knowledge Base Domain Package Initialization and Endpoint Exposure](eecloudkbinit-knowledge-base-domain-package-initialization-and-endpoint-exposure.md) — This module serves as the entry point for the Knowledge Base (KB) domain within
### agent management
- [agent — Agent configuration and metadata storage for workspace-scoped AI agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md) — This module defines the data models for storing agent configurations in the OCEA
### agents domain
- [schemas — Pydantic request/response models for agent lifecycle and discovery operations](schemas-pydantic-requestresponse-models-for-agent-lifecycle-and-discovery-operat.md) — This module defines four Pydantic BaseModel classes that serve as the contract l
### application lifecycle
- [db — MongoDB connection and Beanie ODM lifecycle management for PocketPaw cloud infrastructure](db-mongodb-connection-and-beanie-odm-lifecycle-management-for-pocketpaw-cloud-in.md) — This module provides a centralized, application-level abstraction for managing M
### architectural pattern — facade
- [db — Backward compatibility facade for cloud database initialization](db-backward-compatibility-facade-for-cloud-database-initialization.md) — This module is a thin re-export layer that delegates all database functionality
### architectural refactoring
- [service — Chat domain re-export facade for backward compatibility](service-chat-domain-re-export-facade-for-backward-compatibility.md) — This module serves as a thin re-export layer for the chat domain, consolidating
### architecture — module organization and facade patterns
- [__init__ — Facade module exposing shared cross-cutting concerns for the PocketPaw cloud ecosystem](init-facade-module-exposing-shared-cross-cutting-concerns-for-the-pocketpaw-clou.md) — This module serves as the public interface for shared utilities, services, and i
### auth domain
- [schemas — Pydantic models for authentication request/response validation](schemas-pydantic-models-for-authentication-requestresponse-validation.md) — This module defines three Pydantic BaseModel classes that standardize the shape
### authentication
- [core — Enterprise JWT authentication with cookie and bearer transport for FastAPI](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md) — This module implements a complete authentication system for PocketPaw using fast
- [router — FastAPI authentication endpoints and user profile management](router-fastapi-authentication-endpoints-and-user-profile-management.md) — This module exposes HTTP endpoints for user authentication, registration, profil
### authorization
- [core — Enterprise JWT authentication with cookie and bearer transport for FastAPI](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md) — This module implements a complete authentication system for PocketPaw using fast
### authorization & access control
- [license — Enterprise license validation and feature gating for cloud deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md) — This module provides cryptographic validation of signed license keys, caching of
### backward compatibility
- [service — Chat domain re-export facade for backward compatibility](service-chat-domain-re-export-facade-for-backward-compatibility.md) — This module serves as a thin re-export layer for the chat domain, consolidating
### chat / messaging
- [session — Cloud-tracked chat session document model for pocket-scoped conversations](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md) — The session module defines the Session document model that represents individual
### chat domain
- [chat/__init__.py — Entry point for chat domain with groups, messages, and WebSocket real-time capabilities](chatinitpy-entry-point-for-chat-domain-with-groups-messages-and-websocket-real-t.md) — This module serves as the public API gateway for the chat domain, re-exporting t
- [service — Chat domain re-export facade for backward compatibility](service-chat-domain-re-export-facade-for-backward-compatibility.md) — This module serves as a thin re-export layer for the chat domain, consolidating
### chat/collaboration — domain area for group conversations
- [group — Multi-user chat channels with AI agent participants](group-multi-user-chat-channels-with-ai-agent-participants.md) — This module defines the data models for chat groups/channels that support multip
### cloud storage
- [file — Cloud storage metadata document for managing file references](file-cloud-storage-metadata-document-for-managing-file-references.md) — This module defines the `FileObj` document model that stores metadata about file
### collaborative features
- [pocket — Data models for Pocket workspaces with widgets, teams, and collaborative agents](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md) — This module defines the core document models (Pocket, Widget, WidgetPosition) th
### compatibility layer
- [db — Backward compatibility facade for cloud database initialization](db-backward-compatibility-facade-for-cloud-database-initialization.md) — This module is a thin re-export layer that delegates all database functionality
### configuration storage
- [agent — Agent configuration and metadata storage for workspace-scoped AI agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md) — This module defines the data models for storing agent configurations in the OCEA
### cross-cutting concerns
- [base — Foundational document model with automatic timestamp management for MongoDB persistence](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md) — This module provides `TimestampedDocument`, a base class that extends Beanie's O
### cross-cutting concerns — auth, errors, events shared across all features
- [__init__ — Facade module exposing shared cross-cutting concerns for the PocketPaw cloud ecosystem](init-facade-module-exposing-shared-cross-cutting-concerns-for-the-pocketpaw-clou.md) — This module serves as the public interface for shared utilities, services, and i
### data model
- [file — Cloud storage metadata document for managing file references](file-cloud-storage-metadata-document-for-managing-file-references.md) — This module defines the `FileObj` document model that stores metadata about file
- [schemas — Pydantic request/response models for agent lifecycle and discovery operations](schemas-pydantic-requestresponse-models-for-agent-lifecycle-and-discovery-operat.md) — This module defines four Pydantic BaseModel classes that serve as the contract l
- [workspace — Data model for organization workspaces in multi-tenant enterprise deployments](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md) — This module defines the core data models that represent a workspace: the contain
### data model / ORM
- [session — Cloud-tracked chat session document model for pocket-scoped conversations](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md) — The session module defines the Session document model that represents individual
### data model / schema
- [pocket — Data models for Pocket workspaces with widgets, teams, and collaborative agents](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md) — This module defines the core document models (Pocket, Widget, WidgetPosition) th
### data model layer
- [agent — Agent configuration and metadata storage for workspace-scoped AI agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md) — This module defines the data models for storing agent configurations in the OCEA
- [base — Foundational document model with automatic timestamp management for MongoDB persistence](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md) — This module provides `TimestampedDocument`, a base class that extends Beanie's O
### data model — core persistent entity
- [group — Multi-user chat channels with AI agent participants](group-multi-user-chat-channels-with-ai-agent-participants.md) — This module defines the data models for chat groups/channels that support multip
### data model: ODM document
- [invite — Workspace membership invitation document model](invite-workspace-membership-invitation-document-model.md) — The invite module defines the Invite document class that represents pending work
### data persistence
- [db — MongoDB connection and Beanie ODM lifecycle management for PocketPaw cloud infrastructure](db-mongodb-connection-and-beanie-odm-lifecycle-management-for-pocketpaw-cloud-in.md) — This module provides a centralized, application-level abstraction for managing M
### data validation
- [schemas — Pydantic request/response contracts for session lifecycle operations](schemas-pydantic-requestresponse-contracts-for-session-lifecycle-operations.md) — This module defines the HTTP API contracts (request bodies and response payloads
- [schemas — Pydantic request/response data models for workspace domain operations](schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations.md) — This module defines the contract between the workspace API layer and its consume
- [schemas — Request/response data validation for the knowledge base REST API](schemas-requestresponse-data-validation-for-the-knowledge-base-rest-api.md) — This module defines Pydantic request/response schemas for the knowledge base dom
### dependency injection — fastapi and inversion of control
- [__init__ — Facade module exposing shared cross-cutting concerns for the PocketPaw cloud ecosystem](init-facade-module-exposing-shared-cross-cutting-concerns-for-the-pocketpaw-clou.md) — This module serves as the public interface for shared utilities, services, and i
### document structure
- [pocket — Data models for Pocket workspaces with widgets, teams, and collaborative agents](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md) — This module defines the core document models (Pocket, Widget, WidgetPosition) th
### domain: workspace access control
- [invite — Workspace membership invitation document model](invite-workspace-membership-invitation-document-model.md) — The invite module defines the Invite document class that represents pending work
### enterprise cloud agents
- [ee.cloud.agents — Package initialization and router export for enterprise cloud agent functionality](eecloudagents-package-initialization-and-router-export-for-enterprise-cloud-agen.md) — This is a minimal package initialization module that serves as the public API en
### enterprise cloud platform
- [pockets.__init__ — Entry point and public API aggregator for the pockets subsystem](pocketsinit-entry-point-and-public-api-aggregator-for-the-pockets-subsystem.md) — This module serves as the public interface for the enterprise cloud pockets subs
### enterprise security
- [core — Enterprise JWT authentication with cookie and bearer transport for FastAPI](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md) — This module implements a complete authentication system for PocketPaw using fast
### file management
- [file — Cloud storage metadata document for managing file references](file-cloud-storage-metadata-document-for-managing-file-references.md) — This module defines the `FileObj` document model that stores metadata about file
### foundational infrastructure
- [base — Foundational document model with automatic timestamp management for MongoDB persistence](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md) — This module provides `TimestampedDocument`, a base class that extends Beanie's O
### infrastructure layer
- [db — MongoDB connection and Beanie ODM lifecycle management for PocketPaw cloud infrastructure](db-mongodb-connection-and-beanie-odm-lifecycle-management-for-pocketpaw-cloud-in.md) — This module provides a centralized, application-level abstraction for managing M
### infrastructure — cloud database
- [db — Backward compatibility facade for cloud database initialization](db-backward-compatibility-facade-for-cloud-database-initialization.md) — This module is a thin re-export layer that delegates all database functionality
### knowledge base domain
- [schemas — Request/response data validation for the knowledge base REST API](schemas-requestresponse-data-validation-for-the-knowledge-base-rest-api.md) — This module defines Pydantic request/response schemas for the knowledge base dom
### licensing & commercialization
- [license — Enterprise license validation and feature gating for cloud deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md) — This module provides cryptographic validation of signed license keys, caching of
### module initialization
- [chat/__init__.py — Entry point for chat domain with groups, messages, and WebSocket real-time capabilities](chatinitpy-entry-point-for-chat-domain-with-groups-messages-and-websocket-real-t.md) — This module serves as the public API gateway for the chat domain, re-exporting t
### multi-tenancy
- [workspace — Data model for organization workspaces in multi-tenant enterprise deployments](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md) — This module defines the core data models that represent a workspace: the contain
### multi-tenancy — workspace scoping and data isolation
- [__init__ — Facade module exposing shared cross-cutting concerns for the PocketPaw cloud ecosystem](init-facade-module-exposing-shared-cross-cutting-concerns-for-the-pocketpaw-clou.md) — This module serves as the public interface for shared utilities, services, and i
### multi-tenant architecture
- [router — FastAPI authentication endpoints and user profile management](router-fastapi-authentication-endpoints-and-user-profile-management.md) — This module exposes HTTP endpoints for user authentication, registration, profil
### multi-user feature — supports multiple participants with different roles
- [group — Multi-user chat channels with AI agent participants](group-multi-user-chat-channels-with-ai-agent-participants.md) — This module defines the data models for chat groups/channels that support multip
### package initialization
- [ee.cloud.agents — Package initialization and router export for enterprise cloud agent functionality](eecloudagents-package-initialization-and-router-export-for-enterprise-cloud-agen.md) — This is a minimal package initialization module that serves as the public API en
### package initialization and namespacing
- [pockets.__init__ — Entry point and public API aggregator for the pockets subsystem](pocketsinit-entry-point-and-public-api-aggregator-for-the-pockets-subsystem.md) — This module serves as the public interface for the enterprise cloud pockets subs
### pattern: invitation lifecycle
- [invite — Workspace membership invitation document model](invite-workspace-membership-invitation-document-model.md) — The invite module defines the Invite document class that represents pending work
### real-time messaging infrastructure
- [chat/__init__.py — Entry point for chat domain with groups, messages, and WebSocket real-time capabilities](chatinitpy-entry-point-for-chat-domain-with-groups-messages-and-websocket-real-t.md) — This module serves as the public API gateway for the chat domain, re-exporting t
### request/response contracts
- [schemas — Request/response data validation for the knowledge base REST API](schemas-requestresponse-data-validation-for-the-knowledge-base-rest-api.md) — This module defines Pydantic request/response schemas for the knowledge base dom
### schema definition
- [agent — Agent configuration and metadata storage for workspace-scoped AI agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md) — This module defines the data models for storing agent configurations in the OCEA
- [schemas — Pydantic request/response data models for workspace domain operations](schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations.md) — This module defines the contract between the workspace API layer and its consume
### security & cryptography
- [license — Enterprise license validation and feature gating for cloud deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md) — This module provides cryptographic validation of signed license keys, caching of
### security: token-based invitations
- [invite — Workspace membership invitation document model](invite-workspace-membership-invitation-document-model.md) — The invite module defines the Invite document class that represents pending work
### service layer
- [core — Enterprise JWT authentication with cookie and bearer transport for FastAPI](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md) — This module implements a complete authentication system for PocketPaw using fast
- [service — Chat domain re-export facade for backward compatibility](service-chat-domain-re-export-facade-for-backward-compatibility.md) — This module serves as a thin re-export layer for the chat domain, consolidating
### sessions domain
- [schemas — Pydantic request/response contracts for session lifecycle operations](schemas-pydantic-requestresponse-contracts-for-session-lifecycle-operations.md) — This module defines the HTTP API contracts (request bodies and response payloads
### system-wide contracts
- [schemas — Pydantic models for authentication request/response validation](schemas-pydantic-models-for-authentication-requestresponse-validation.md) — This module defines three Pydantic BaseModel classes that standardize the shape
### temporal auditing
- [base — Foundational document model with automatic timestamp management for MongoDB persistence](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md) — This module provides `TimestampedDocument`, a base class that extends Beanie's O
### workspace and collaboration domain
- [pockets.__init__ — Entry point and public API aggregator for the pockets subsystem](pocketsinit-entry-point-and-public-api-aggregator-for-the-pockets-subsystem.md) — This module serves as the public interface for the enterprise cloud pockets subs
### workspace domain
- [schemas — Pydantic request/response data models for workspace domain operations](schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations.md) — This module defines the contract between the workspace API layer and its consume
### workspace management
- [pocket — Data models for Pocket workspaces with widgets, teams, and collaborative agents](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md) — This module defines the core document models (Pocket, Widget, WidgetPosition) th
- [session — Cloud-tracked chat session document model for pocket-scoped conversations](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md) — The session module defines the Session document model that represents individual
- [workspace — Data model for organization workspaces in multi-tenant enterprise deployments](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md) — This module defines the core data models that represent a workspace: the contain

View File

@@ -0,0 +1,220 @@
# __init__ — Facade module exposing shared cross-cutting concerns for the PocketPaw cloud ecosystem
> This module serves as the public interface for shared utilities, services, and infrastructure used across the PocketPaw cloud platform. It acts as a barrel export that aggregates cross-cutting concerns—authentication, workspace management, event handling, licensing, and agent orchestration—making them discoverable and accessible to dependent modules. By centralizing these imports, it establishes clear dependencies and prevents circular import chains within the cloud subsystem.
**Categories:** architecture — module organization and facade patterns, dependency injection — fastapi and inversion of control, multi-tenancy — workspace scoping and data isolation, cross-cutting concerns — auth, errors, events shared across all features
**Concepts:** barrel_export_pattern, facade_pattern, multi_tenancy, workspace_scoping, dependency_injection, event_driven_architecture, cross_cutting_concerns, fastapi_dependencies, error_handling, authentication
**Words:** 1336 | **Version:** 1
---
## Purpose
The `shared/__init__.py` module exists as a **facade and aggregation point** for infrastructure-level functionality that spans multiple business domains within PocketPaw's cloud services. Rather than having individual feature modules (workspace, user, agent, etc.) discover and import their dependencies scattered across the codebase, this module curates and re-exports all common, reusable concerns.
This solves several architectural problems:
1. **Dependency Clarity**: Dependent code clearly sees what foundational services are available by importing from `shared`
2. **Circular Import Prevention**: By centralizing re-exports in one place, circular dependency chains are broken at the seam between feature layers
3. **API Stability**: The `shared` module acts as a contract—internal reorganizations don't break downstream modules as long as re-exports remain stable
4. **Onboarding**: New developers understand the ecosystem immediately by seeing all shared primitives in one place
## Key Components and Their Roles
Based on the import graph, this module aggregates these major concerns:
### Foundational Infrastructure
- **`errors`**: Custom exception hierarchy for cloud operations (authentication failures, workspace violations, etc.)
- **`deps`**: FastAPI dependency injection layer; provides factories for injecting authenticated context, workspace scoping, and rate-limit quotas into route handlers
### API Layer
- **`router`**: FastAPI router definitions; aggregates all HTTP endpoints exposed by the cloud module
### Core Domain Models
- **`workspace`**: Workspace entity and workspace-scoped operations; represents the isolation boundary for multi-tenant data
- **`user`**: User identity, authentication tokens, and user preferences
- **`agent`**: AI agent definitions and agent lifecycle management
- **`session`**: User session tracking and context propagation
- **`license`**: Licensing and subscription state; controls feature access
### Feature Domains
- **`comment`**: Collaborative commenting on agents, workspaces, and artifacts
- **`file`**: File storage and versioning within workspaces
- **`group`**: User group management for RBAC within workspaces
- **`invite`**: Workspace invitations and join flows
- **`message`**: Direct messaging between agents and users
- **`notification`**: Event-driven notifications (real-time alerts, digests)
- **`pocket`**: Pocket objects (the primary business entity in PocketPaw)
### Integration Points
- **`agent_bridge`**: Bridges between cloud-hosted user data and external AI agent platforms
- **`event_handlers`**: Event subscriptions and handlers; ties domain events to side effects (notifications, agent triggers, etc.)
- **`core`**: Likely low-level utilities (validation, serialization, time handling)
## How It Works
### Import Resolution Flow
When a module outside `shared/` (e.g., a route handler or a service class) needs access to cross-cutting concerns:
```python
# Instead of:
from ee.cloud.errors import ValidationError
from ee.cloud.deps import get_current_user
from ee.cloud.workspace import WorkspaceService
# ... repeat for 10+ imports
# Developers write:
from ee.cloud.shared import (
ValidationError,
get_current_user,
WorkspaceService,
# ... all in one well-known location
)
```
### Dependency Graph Structure
```
shared/__init__.py (THIS MODULE)
↓ (re-exports)
├─ errors → exception types consumed by all handlers
├─ deps → FastAPI dependency functions injected into route signatures
├─ workspace → workspace context injected by deps
├─ user → user context injected by deps
├─ license → checked by authorization decorators
├─ event_handlers → subscribed to domain events
└─ ... (domain services)
↑ (imported by)
├─ api.handlers (HTTP route handlers)
├─ services (business logic)
└─ tasks (background jobs)
```
### Initialization Sequence
When the cloud module loads:
1. FastAPI application initializes
2. `shared/__init__.py` imports all sub-modules (errors, deps, router, etc.)
3. Dependency injection container is configured (in `deps`)
4. Event handlers register themselves (in `event_handlers`)
5. Routes are registered with the app (via `router`)
6. Workspace and user middleware inject context into request objects
7. Application is ready to serve requests
## Authorization and Security
While this module doesn't implement authorization itself, it serves as the **collection point** for security primitives:
- **`user`**: Contains user identity and authentication token validation
- **`session`**: Manages session expiration and revocation
- **`license`**: Enforces feature access control (e.g., pro features only available to paid workspaces)
- **`deps`**: Provides injectors like `get_current_user()` that middleware uses to authenticate requests
- **`group`**: Enables RBAC (role-based access control) within workspaces
- **`workspace`**: Enforces data isolation—one workspace cannot access another's data
Security checks cascade: authentication (user) → session validation → workspace membership → feature licensing → RBAC (group/role).
## Dependencies and Integration
### What This Module Depends On
All the modules it imports (errors, router, workspace, etc.) are **internal siblings** within the cloud subsystem. They form a tightly coupled domain model—workspace operations require user context, notifications require event handlers, etc.
### What Depends on This Module
Based on the import graph structure, this module is imported by:
- **HTTP Route Handlers**: `api.handlers` modules use shared services and dependency injection
- **Background Job Processors**: Async tasks use event handlers and workspace context
- **Tests**: Test suites import shared fixtures, mocks, and service factories
### Integration Pattern
The module follows the **barrel export pattern**:
```python
# shared/__init__.py (THIS FILE)
"""Shared cross-cutting concerns for the PocketPaw cloud module."""
# Implicit re-exports via standard Python import mechanics
```
The single docstring signals intent: "this is a facade for shared infrastructure." Dependent code then imports as:
```python
from ee.cloud.shared import get_current_user, WorkspaceError
```
Internally, each imported submodule (e.g., `errors.py`, `deps.py`) is a focused, single-responsibility module.
## Design Decisions
### 1. **Minimal Module—Maximum Clarity**
The `__init__.py` contains only a docstring and implicit re-exports. This is intentional:
- No runtime logic or initialization code clutters the file
- Import statements are self-documenting (the import list IS the API contract)
- Changes to internal module organization don't require code edits here (only structural reorganization)
### 2. **Facade Over Inheritance**
Instead of a base class that all services inherit from, the shared module aggregates services. This allows:
- Services to be composed freely without coupling to a base hierarchy
- Event handlers and dependencies to be injected rather than tightly coupled
- Easier testing (mock any service by injecting a test double)
### 3. **Workspace as the Data Isolation Boundary**
Workspace appears prominently in the exports because it's the **multi-tenancy seam**. Every feature (workspace, message, file, group, invite, notification) is workspace-scoped. By centralizing workspace as a shared concept, the module enforces consistent isolation across all domains.
### 4. **Event-Driven Side Effects**
`event_handlers` is exported alongside domain services because the architecture decouples triggering an event (e.g., "user added to group") from handling it ("send notification"). Event handlers subscribe to domain events and perform side effects, reducing direct coupling between services.
### 5. **Dependency Injection as a First-Class Concern**
`deps` is a shared export because FastAPI route handlers rely on dependency injection for:
- Current user context (populated by auth middleware)
- Workspace scoping (populated by workspace middleware)
- Database session lifecycle management
This keeps route handlers thin and testable.
## Concepts and Patterns
- **Barrel Export Pattern**: Aggregate multiple submodules under a single public interface
- **Facade Pattern**: Present a unified interface to a complex subsystem (errors, services, dependencies)
- **Multi-Tenancy via Workspace Scoping**: Each operation is implicitly scoped to a workspace; data isolation is enforced at the domain layer
- **Dependency Injection**: Services and context are injected into handlers, not instantiated globally
- **Event-Driven Architecture**: Domain events trigger handlers asynchronously or synchronously
- **Cross-Cutting Concerns**: Authentication, logging, validation, and error handling span all features; this module aggregates them
- **FastAPI Dependency Injection**: Using FastAPI's `Depends()` to inject authenticated context and workspace scope into route signatures
## When to Use This Module
1. **Starting a New Feature**: Import shared services and dependency injectors as the foundation
2. **Writing Route Handlers**: Use `deps` to inject user and workspace context
3. **Handling Domain Events**: Subscribe to events in `event_handlers` and import event types
4. **Testing**: Mock services from `shared` and inject them into the code under test
5. **Onboarding New Developers**: This module is the map of the entire cloud subsystem's infrastructure
## What NOT to Do
1. **Don't add feature-specific code here**: This module is for truly cross-cutting, infrastructure-level concerns only
2. **Don't instantiate services directly**: Use dependency injection; let the `deps` layer manage lifecycles
3. **Don't bypass workspace scoping**: Always enforce workspace boundaries; never query all workspaces in a request context
4. **Don't create new circular dependencies**: If a sibling module (e.g., `workspace.py`) needs to import from another sibling (e.g., `user.py`), ensure no bidirectional imports exist; break cycles with interfaces or events
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,256 @@
# invite — Workspace membership invitation document model
> The invite module defines the Invite document class that represents pending workspace membership invitations sent to email addresses. It exists as a dedicated data model to manage the lifecycle of invitations—from creation through expiration, acceptance, or revocation—providing a clean separation between invitation domain logic and the service layer that consumes it. This module is foundational to PocketPaw's workspace access control system, enabling asynchronous onboarding of new workspace members with time-limited, role-based tokens.
**Categories:** domain: workspace access control, data model: ODM document, pattern: invitation lifecycle, security: token-based invitations
**Concepts:** Invite, Document, Beanie ODM, Indexed, Field, Pydantic validation, UTC timezone, unique constraint, soft delete pattern, token-based authentication
**Words:** 2040 | **Version:** 1
---
## Purpose
The invite module encapsulates the data model for workspace membership invitations in PocketPaw. Its core purpose is to represent a time-limited, role-based invitation token that allows users without workspace access to join a workspace at a specified membership level.
Why this module exists:
- **Deferred Access Control**: Invitations enable workspace owners to grant access to users who may not yet be in the system. The invitation exists independently of user authentication.
- **Temporal Constraints**: Invitations have explicit expiration windows (default 7 days). This requires a dedicated model to track expiry state separate from user or workspace objects.
- **Audit Trail**: The Invite document records who invited whom, the role being granted, and optionally which group the user should auto-join. This provides accountability for access provisioning.
- **Token-Based Distribution**: Invitations use unique tokens as distribution vectors—these can be sent via email, shared links, or embedded in communications without exposing internal IDs.
In the system architecture, the invite module sits at the intersection of authentication (tokens), authorization (roles), and workspace management. It bridges the gap between workspace owners (who provision access) and prospective members (who accept access).
## Key Classes and Methods
### Invite (Document)
The `Invite` class is a Beanie ODM document representing a single workspace membership invitation.
**Fields and their purposes:**
- `workspace: Indexed[str]` — The workspace ID this invitation grants access to. Indexed for fast lookups when retrieving invitations for a specific workspace. Cannot be null.
- `email: Indexed[str]` — The target email address for this invitation. Indexed to prevent duplicate invitations to the same email for the same workspace. This is the user-facing identifier before they accept and create a user account.
- `role: str` — The membership role to assign upon acceptance. Constrained to exactly one of: `"admin"`, `"member"`, or `"viewer"`. Defaults to `"member"`. Uses a Pydantic regex pattern to enforce the constraint at serialization/validation time.
- `invited_by: str` — User ID of the person who created this invitation. Tracks accountability and enables features like "invitations sent by me."
- `token: Indexed[str, unique=True]` — A cryptographically unique token (likely generated by the invitation service). Indexed and enforced unique to prevent accidental duplicate tokens and enable fast lookups by token. This is the secret shared with the invitee.
- `group: str | None` — Optional Group ID. If set, the user auto-joins this group when they accept the invitation. Enables workspace owners to automatically onboard users into team structures.
- `accepted: bool` — Flag indicating whether this invitation has been acted upon. Defaults to `False`. Set to `True` when the invitee accepts and joins the workspace.
- `revoked: bool` — Flag indicating whether the invitation creator has revoked it before expiry. Defaults to `False`. Allows workspace owners to cancel invitations.
- `expires_at: datetime` — Absolute UTC timestamp when this invitation becomes invalid. Uses a factory function to default to 7 days from creation. Enables time-limited access control.
**Methods:**
- `expired` (property) — Returns `True` if the invitation has passed its `expires_at` timestamp, `False` otherwise. Handles timezone-naive datetime objects by assuming UTC. This is a computed property rather than a persisted field, meaning expiry is determined at read-time, not pre-computed. This design choice trades a small computation cost for simplicity: no need for background jobs to mark invitations as expired.
**Beanie Settings:**
- `name = "invites"` — Configures the MongoDB collection name to `"invites"` (not the default plural of the class name).
### _default_expiry()
A module-level factory function that returns a datetime 7 days in the future (in UTC). Used as the default factory for the `expires_at` field. This ensures each invitation created gets a fresh 7-day window rather than sharing a single timestamp. Separated into its own function (rather than a lambda) for testability and clarity.
## How It Works
**Invitation Lifecycle:**
1. **Creation**: When a workspace owner invites someone, the invitation service (not shown in this module) creates an Invite document with:
- The target `email` and workspace
- A unique `token` (cryptographically generated)
- The role to grant (`role`)
- The inviter's user ID (`invited_by`)
- Optional `group` for auto-join
- Auto-calculated `expires_at` (7 days from now)
- `accepted=False, revoked=False` by default
2. **Distribution**: The token is embedded in an email link or shareable URL and sent to the `email` address.
3. **Acceptance**: When the invitee clicks the link or provides the token, the invitation service:
- Queries for the Invite by `token`
- Validates that `not expired`, `not accepted`, and `not revoked`
- Creates a new user account or links to existing account
- Sets `accepted=True` on the Invite
- Creates a workspace membership with the specified `role`
- Auto-joins the `group` if specified
4. **Expiration/Revocation**: Invitations can end in three ways:
- **Expiry**: If `expires_at` passes, the `expired` property returns `True`, and the invitation service rejects acceptance attempts
- **Revocation**: If the creator calls revoke, `revoked=True` is set, and acceptance fails
- **Acceptance**: If the user accepts, `accepted=True` is set
**Data Flow Example:**
```
Workspace Owner Invite Document Invitee
| | |
|-- Creates Invite ----------> | |
| (sets workspace, email, | |
| role, token, expires_at) | |
| | |
| |-- Email with token -------> |
| | |
| | <-- Accepts --|
| | (provides token) |
| | |
| [Validate: |
| - token exists |
| - not expired |
| - not revoked |
| - not accepted] |
| | |
| |-- set accepted=True |
| | |
| |-- Create membership with role
| | |
```
**Edge Cases:**
- **Timezone Handling**: The `expired` property normalizes timezone-naive datetimes to UTC before comparison. This handles documents created in environments without explicit timezone info.
- **Unique Token Constraint**: The `unique=True` constraint on `token` at the database level prevents two invitations with the same token, which could bypass acceptance controls.
- **Immutable Role**: Once an invitation is created with a role, changing the role requires creating a new invitation. This prevents privilege escalation attacks where a user could modify an in-flight invitation.
## Authorization and Security
**Access Control Implications:**
- **Token-Based**: Invitations use tokens rather than direct user IDs, preventing unauthorized acceptance by users who didn't receive the invitation.
- **Expiration**: Time limits prevent indefinite validity windows, reducing the window for token compromise or misuse.
- **Role Constraint**: The regex pattern on the `role` field enforces only valid role values at the model level, preventing invalid roles from being persisted.
- **Revocation**: The `revoked` flag allows immediate cancellation without waiting for expiry, enabling response to security concerns.
**Service-Level Controls (not in this module):**
The invitation service (imported by `__init__` and consumed by service layer code) must validate:
- That only workspace admins can create invitations
- That tokens are cryptographically random and unpredictable
- That acceptance checks all validation flags before granting access
- That revocation only works for unaccepted invitations
## Dependencies and Integration
**External Dependencies:**
- **Beanie** (`from beanie import Document, Indexed`) — MongoDB async ODM. The Invite class extends Document, gaining persistence, validation, and indexing capabilities. Beanie handles serialization to/from BSON.
- **Pydantic** (`from pydantic import Field`) — Data validation. Used here for:
- Field constraints (the regex pattern on `role`)
- Field metadata (default values, factories)
- Type coercion and validation on load/save
- **Python datetime** (`from datetime import UTC, datetime, timedelta`) — Standard library for timezone-aware timestamps. UTC is used throughout to avoid timezone ambiguity in a distributed system.
**Internal Integration Points:**
- **Imported by `__init__`**: The Invite class is exported in the module's `__init__.py`, making it available to other packages in the codebase. This follows a pattern of exposing public domain models through a clean API.
- **Imported by `service`**: The invitation service layer (not shown) uses Invite as both:
- A data persistence layer (querying, creating, updating documents)
- A validation schema (checking fields like `expired`, `revoked`, `accepted`)
- **Workspace Model** (implicit): Invitations reference workspaces by ID. The service layer must ensure the referenced workspace exists.
- **User Model** (implicit): The `invited_by` field references a user ID. The service layer must validate this user exists and has permission to invite.
- **Group Model** (implicit): The optional `group` field references a group ID. The service layer must validate this group exists in the target workspace.
**Reverse Dependencies:**
Code that imports Invite depends on its stability. Changes to field names, types, or validation rules impact:
- The invitation service layer (must update queries and creation logic)
- API endpoints that expose invitations (must update response schemas)
- Frontend code that displays invitations
## Design Decisions
**1. Expiry as a Computed Property, Not a Batch Job**
The `expired` property computes expiry at read-time rather than using a background job to mark invitations as expired. This trades a microsecond of CPU cost per read for:
- **No stale state**: An invitation is never marked "expired" in the database; expiry is determined by comparison.
- **No background complexity**: No need to schedule and monitor a cleanup job.
- **Simpler reasoning**: The invitation is always in sync with the current time.
The downside is that queries like "find all non-expired invitations" require fetching all invitations and filtering in application code (unless handled by the service layer with a query that filters `expires_at > now`).
**2. Unique Token at the Database Level**
The `unique=True` constraint on `token` creates a unique index in MongoDB. This means:
- Token collisions are impossible at the database layer
- Attempting to insert a duplicate token fails with a database error (which the service layer must handle)
- No two invitations can share a token, preventing acceptance ambiguity
This is more secure than a service-layer check because it's enforced by the database, preventing race conditions where two simultaneous requests create tokens with the same value.
**3. Soft Delete with Flags (accepted, revoked) Rather Than Hard Delete**
Invitations use boolean flags instead of deletion:
- **Audit Trail**: Historical records of who was invited when remain queryable
- **Idempotency**: Accepting an already-accepted invitation can be detected (check `accepted` flag)
- **Revocation History**: Revoked invitations remain in the database for auditing
The downside is that queries must filter on these flags to find "active" invitations.
**4. Role as a String with Pattern Validation Rather Than an Enum**
The `role` field is a string with regex pattern validation rather than a Python Enum or a separate Role collection. This allows:
- Flexibility: New roles can be added in the service layer without schema migrations
- Simplicity: No circular imports or separate role models
- Pydantic validation: The pattern is checked at serialization/deserialization
The downside is type safety: IDEs cannot autocomplete valid role values, and typos in the service layer won't be caught at type-check time.
**5. Group as Optional Rather Than Required**
The `group` field is nullable (`str | None = None`). This allows:
- Flexible invitation workflows: Invitations without auto-group-join
- Later enhancement: Auto-join logic can be added to the service without schema migration
The service layer must validate that if `group` is provided, it exists in the target workspace.
**6. Indexed Fields for Query Performance**
The fields `workspace`, `email`, and `token` are indexed:
- **workspace**: Fast "find all invitations for this workspace"
- **email**: Fast "find all invitations to this email"
- **token**: Fast "find invitation by token" (used during acceptance)
These indexes are critical for the happy path: when an invitee clicks a link with a token, the service does a fast indexed lookup.
## Common Patterns and Usage
**Pattern: Invitation Acceptance**
```python
# Pseudo-code: how the service layer uses Invite
invite = await Invite.find_one({"token": provided_token})
if invite and not invite.expired and not invite.revoked and not invite.accepted:
# Create membership
# Update document
invite.accepted = True
await invite.save()
else:
# Reject: expired, revoked, already accepted, or invalid token
```
**Pattern: Finding Active Invitations**
```python
# Pseudo-code: find invitations a user can still act upon
active = await Invite.find({
"workspace": workspace_id,
"email": user_email,
"revoked": False,
"accepted": False,
# expires_at > now is handled in-app via the expired property
}).to_list()
# Filter further in-app: active = [i for i in active if not i.expired]
```
**Pattern: Revoking an Invitation**
```python
# Pseudo-code: revoke before acceptance
invite = await Invite.find_one({"token": token})
if invite and not invite.accepted:
invite.revoked = True
await invite.save()
else:
# Too late: already accepted or doesn't exist
```
---
## Related
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)
- [untitled](untitled.md)

View File

@@ -0,0 +1,281 @@
# license — Enterprise license validation and feature gating for cloud deployments
> This module provides cryptographic validation of signed license keys, caching of license state, and FastAPI dependency injection hooks to gate enterprise features. It exists to enforce licensing policies at runtime while maintaining a clean separation between licensing logic and business logic, enabling PocketPaw to support both open-source and commercial deployment models.
**Categories:** licensing & commercialization, authorization & access control, FastAPI integration, security & cryptography
**Concepts:** LicensePayload, LicenseInfo, Ed25519 cryptography, HMAC-SHA256 fallback, FastAPI dependency injection, Depends(), HTTPException, require_license, require_feature, get_license_info
**Words:** 1795 | **Version:** 1
---
## Purpose
The `license` module is the runtime enforcement layer for PocketPaw's enterprise licensing system. It solves two problems:
1. **Verification**: Ensure that license keys provided at deployment time are authentic (signed by the license server) and valid (not expired, issued to a legitimate org).
2. **Authorization**: Gate access to premium features at the HTTP endpoint level using FastAPI's dependency injection system, preventing unlicensed deployments from accessing enterprise functionality.
This module exists as a separate concern because licensing is orthogonal to core business logic—a user management service shouldn't need to know about license states. By centralizing this here, the system can:
- Use Ed25519 cryptography to verify license authenticity without storing the private key in the codebase
- Support multiple deployment models: self-hosted (HMAC-SHA256 fallback), cloud (Ed25519 verification), and open-source (no license required, endpoints return 403)
- Cache the license on first load to avoid repeated disk/env lookups
- Provide a single source of truth for license state across all endpoints
## Key Classes and Methods
### `LicensePayload(BaseModel)`
The data model representing the contents of a valid license key. It holds:
- **`org`** (str): Organization identifier (e.g., "acme-inc"), used for audit logging and multi-tenancy
- **`plan`** (str): License tier—"team" (default, 5 seats), "business", or "enterprise"
- **`seats`** (int): Number of concurrent users allowed (default 5)
- **`exp`** (str): Expiration date in ISO format (e.g., "2027-01-01")
- **`features`** (list[str]): Optional feature flags (e.g., ["analytics", "sso"])
**Key properties:**
- **`expired`** (property): Returns True if current UTC time > expiration date. Handles date parsing errors gracefully by returning True (fail-safe: invalid dates are treated as expired).
- **`has_feature(feature: str)`** (method): Returns True if the feature is in the features list OR the plan is "enterprise" (enterprise always unlocks all features). This implements the business rule that enterprise licenses are feature-complete.
### `_verify_signature(payload_bytes: bytes, signature_hex: str) -> bool`
Cryptographic validation function with a fallback chain:
1. **Primary (Ed25519)**: If `POCKETPAW_LICENSE_PUBLIC_KEY` is set, verify the signature using the public key embedded in the code. This is the secure path for cloud deployments.
2. **Fallback (HMAC-SHA256)**: If no public key is configured, compute `SHA256("<secret>:<payload>")` and compare. This allows self-hosted deployments to use a simpler symmetric key model without managing keypairs.
3. **Reject**: If neither key is available, return False (fail-safe).
The function catches all exceptions (malformed hex, cryptography library errors) and returns False, preventing crashes from bad input.
### `validate_license_key(key: str) -> LicensePayload`
The main parsing and validation entry point. It:
1. Base64-decodes the license key string
2. Splits on the last "." to separate payload from signature
3. Verifies the signature cryptographically
4. JSON-deserializes the payload into a `LicensePayload` object
5. Checks expiration
6. Raises `ValueError` with a specific message if any step fails
This is the only function that parses untrusted input, so all validation is concentrated here.
### `load_license() -> LicensePayload | None`
Startup-time license loader:
1. Returns cached license if already loaded (prevents re-parsing)
2. Attempts to load `.env` file (via `dotenv`) if available
3. Reads `POCKETPAW_LICENSE_KEY` from environment
4. Calls `validate_license_key()` and caches the result
5. Returns None if key is missing or invalid, storing the error reason in `_license_error` for later reporting
6. Logs success/failure at WARNING level so operators see licensing status in startup output
This is called during app initialization (via FastAPI startup hooks or explicit imports).
### `get_license() -> LicensePayload | None`
Lazy loader and cache getter. Returns the cached license if available; otherwise calls `load_license()`. This is safe to call on every request because the cache prevents repeated parsing.
### `async require_license() -> LicensePayload`
A FastAPI dependency that gates endpoints behind a valid license:
```python
@app.get("/api/enterprise/thing")
async def get_thing(license: LicensePayload = Depends(require_license)):
# Only reachable if license is valid and not expired
...
```
Raises `HTTPException(403)` with a descriptive error message if:
- License is None (not configured)
- License is expired
The error message includes the stored license error (e.g., "Invalid signature") so operators can debug configuration issues.
### `require_feature(feature: str)`
A dependency factory that returns a specialized dependency for per-feature gating:
```python
@app.get("/api/sso/config")
async def get_sso_config(license: LicensePayload = Depends(require_feature("sso"))):
# Only reachable if license exists, isn't expired, AND includes "sso" feature
...
```
Composed as: calls `require_license()` (ensures a valid license exists), then checks `license.has_feature(feature)`. Raises `HTTPException(403)` with the plan name if the feature is not included.
### `LicenseInfo(BaseModel)` & `get_license_info() -> LicenseInfo`
A read-only view of license state for the settings/admin UI:
- **`valid`** (bool): True if license exists and is not expired
- **`org`, `plan`, `seats`, `exp`** (optional): Populated from the license payload
- **`error`** (optional): Human-readable error message (e.g., "License expired", "Invalid signature")
`get_license_info()` always returns a `LicenseInfo` object (never raises), making it safe to expose via a public endpoint for UI rendering.
## How It Works
### Initialization Flow
1. **App startup**: The FastAPI app imports this module (or explicitly calls `load_license()`)
2. `load_license()` reads the environment variable and validates the key
3. The `LicensePayload` is cached in `_cached_license` and the app continues normally
4. If validation fails, `_license_error` is set and subsequent license checks return None
### Request-Time License Check
1. A client hits an endpoint decorated with `@Depends(require_license)` or `@Depends(require_feature(...))`
2. FastAPI calls the dependency function
3. The dependency calls `get_license()`, which returns the cached `LicensePayload` (fast path) or None
4. If None, an HTTPException(403) is raised; FastAPI returns a 403 response to the client
5. If valid, the endpoint handler receives the license as an argument and proceeds
### Key Data Flow
```
POCKETPAW_LICENSE_KEY (env var)
validate_license_key()
├─ base64 decode
├─ split on "."
├─ _verify_signature() → cryptographic check
└─ JSON deserialize → LicensePayload
_cached_license
get_license() → (used by endpoints)
require_license() [FastAPI dependency]
HTTPException(403) or endpoint handler
```
### Edge Cases
1. **Missing public key**: If `POCKETPAW_LICENSE_PUBLIC_KEY` is not set, the system falls back to HMAC-SHA256. This allows self-hosted installations to validate licenses without managing asymmetric keys.
2. **Unparseable dates**: If the `exp` field cannot be parsed as an ISO date, `expired` returns True (fail-safe: invalid licenses are treated as expired).
3. **Missing .env file**: The code attempts to load `.env` via `python-dotenv`, but ignores ImportError if the library isn't installed. This allows the module to work in environments where `.env` files aren't used.
4. **Expired enterprise key with no public key**: If the key format is invalid but `POCKETPAW_LICENSE_SECRET` is set, the signature check may pass, but the expiration check still fails.
5. **Concurrent requests**: The cache is not thread-locked, but loading the license twice is idempotent and safe (parsing the same environment variable twice yields the same result).
## Authorization and Security
### Cryptographic Security
- **Production (cloud)**: License keys are signed with Ed25519 (NIST-recommended, post-quantum resistant). The public key is embedded in this file; the private key exists only on the license server. An attacker cannot forge a license without the private key.
- **Self-hosted fallback**: Uses HMAC-SHA256 with a shared secret (`POCKETPAW_LICENSE_SECRET`). The secret must be provisioned out-of-band and kept confidential. HMAC is vulnerable to brute-force but acceptable for internal deployments.
- **No license**: If neither key is configured, all signature checks fail. Deployments without licensing can run open-source features but cannot access enterprise endpoints.
### Access Control
Two layers of gating:
1. **`require_license()`**: Requires a valid, non-expired license. Permits any plan (team, business, enterprise).
2. **`require_feature(feature_name)`**: Requires a valid license that explicitly includes the feature, or is on the "enterprise" plan. Per-feature access control allows granular commercialization.
### No User-Level Licensing
This module does not implement per-seat or per-user licensing (seat counting is not performed). The `seats` field in the payload is informational; it's the operator's responsibility to enforce user limits at the organization or reverse-proxy level.
## Dependencies and Integration
### Internal Dependencies
- **`fastapi`**: Used for `Depends`, `HTTPException`, and the `Request` type hint
- **`pydantic`**: Used for `BaseModel` to define `LicensePayload` and `LicenseInfo`
- **`cryptography` (conditional)**: Only imported if Ed25519 verification is attempted; if unavailable or key is invalid, falls back to HMAC
- **`python-dotenv` (optional)**: Attempts to load `.env` files; gracefully skipped if not installed
- **`datetime`**: For expiration date parsing and comparison
### What Imports This Module
Based on the import graph:
- **`__init__` (package init)**: Re-exports key functions and classes (`require_license`, `require_feature`, `get_license_info`) so they're available as `from pocketpaw.ee.cloud import require_license`
- **`router`**: A FastAPI router module that uses `require_license()` and `require_feature()` to protect enterprise endpoints
### How It Integrates
```python
# In router.py (example usage)
from fastapi import APIRouter
from .license import require_license, require_feature
router = APIRouter(prefix="/api/enterprise")
@router.get("/analytics", dependencies=[Depends(require_license)])
async def get_analytics():
return {...}
@router.post("/sso/config", dependencies=[Depends(require_feature("sso"))])
async def set_sso_config(config: SSOConfig):
return {...}
```
The `router` imports from `license` to decorate endpoints, ensuring that only licensed deployments can call them.
## Design Decisions
### 1. **Dual-Key Strategy (Ed25519 + HMAC)**
Rather than requiring all deployments to manage a public key, the code supports two modes:
- Cloud/SaaS: Customers get a signed license key; the public key is embedded
- Self-hosted: Customers get a secret; they compute an HMAC to verify
This lowers friction for self-hosted deployments while maintaining strong cryptographic guarantees for cloud.
### 2. **Caching the License**
The license is loaded once and cached. This avoids repeated environment variable reads and JSON parsing on every request. The cache is never invalidated (licenses are static at runtime), and there's no background refresh logic, which keeps the code simple but requires a restart to pick up license changes.
### 3. **Fail-Safe Defaults**
- Invalid dates → expired
- Missing public key + missing secret → all signatures fail
- Parsing errors → logged and cached as None
These prevent accidental security leaks if configuration is partial.
### 4. **Separation of Validation and Authorization**
- `validate_license_key()` is pure: it parses and validates structure/signature
- `require_license()` is async and raises HTTP exceptions: it enforces policy
This separation allows unit testing of validation logic independently of FastAPI's request context.
### 5. **Per-Feature Gating via Dependency Factory**
`require_feature(feature)` returns a closure-based dependency. This allows:
```python
@app.get("/sso", dependencies=[Depends(require_feature("sso"))])
@app.get("/analytics", dependencies=[Depends(require_feature("analytics"))])
```
Without the factory pattern, you'd need to hardcode the feature name inside each endpoint. The factory decouples feature names from endpoint definitions.
### 6. **License Info Endpoint (Non-Throwing)**
`get_license_info()` is designed to be called from public, unauthenticated endpoints (like a health check or settings page). It never raises, always returns a `LicenseInfo` object, and includes error messages for debugging. This lets operators diagnose licensing issues via a simple GET request.
### 7. **Global State (Cached License)**
The module uses module-level variables `_cached_license` and `_license_error`. This is stateful but acceptable because:
- Licenses don't change at runtime (no race conditions)
- All threads/workers share the same environment variable
- The cache is read-heavy (every request) and write-once (startup), favoring simplicity over locking
In a future refactor, this could be moved to a singleton service class if the app grows more complex state management.
---
## Related
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)
- [untitled](untitled.md)

View File

@@ -0,0 +1,201 @@
# message — Data model for group chat messages with mentions, reactions, and threading support
> This module defines the Pydantic data models that represent chat messages in groups, including support for mentions, file attachments, emoji reactions, and message threading. It exists as a dedicated model layer to provide a single source of truth for message structure across the application, enabling consistent validation and serialization when messages are created, retrieved, or modified. The module serves as the bridge between the MongoDB persistence layer (via Beanie ODM) and higher-level services that need to work with message data.
**Categories:** Chat & Messaging, Data Model Layer, MongoDB/Beanie Persistence, Domain Model
**Concepts:** Message, Mention, Attachment, Reaction, TimestampedDocument, group_id_indexing, compound_index, soft_delete, message_threading, user_mentions
**Words:** 1718 | **Version:** 1
---
## Purpose
The `message` module defines the complete schema for group chat messages in PocketPaw. It exists to:
1. **Provide a single source of truth for message structure** — All code that reads or writes messages depends on these definitions, ensuring consistency across the codebase
2. **Enable validation at the boundary** — Pydantic models validate message data when it enters the system, catching malformed data before it reaches the database
3. **Support rich chat features** — The schema accommodates modern chat requirements: mentions (@user, @agent, @everyone), file/media attachments, emoji reactions, and threaded replies
4. **Enable MongoDB indexing for performance** — The `Message` class defines database indexes for the common query pattern of fetching messages from a group sorted by creation time
In the system architecture, this module sits in the **data model layer** — it defines the contract between the API layer (routers), the service layer (message_service), and the persistence layer (Beanie/MongoDB). Services and routers import and use these models when validating requests, transforming database documents, and returning responses to clients.
## Key Classes and Methods
### `Mention(BaseModel)`
Represents a mention (tag) of a user, agent, or group within message content.
**Fields:**
- `type: str` — The entity being mentioned: `"user"` (individual user), `"agent"` (bot/AI agent), or `"everyone"` (group mention)
- `id: str` — The unique identifier of the mentioned entity (User ID or Agent ID). Empty string for @everyone mentions
- `display_name: str` — The human-readable name shown in the UI (e.g., `"@rohit"`, `"@PocketPaw"`)
**Business logic:** When a user types `@rohit` in a message, the frontend or service layer creates a `Mention` object with `type="user"`, `id=<rohit_user_id>`, and `display_name="rohit"`. This structured format enables:
- Efficient querying of messages mentioning specific users
- Triggering notifications when a user is mentioned
- Rendering mentions with proper styling/links in the UI
### `Attachment(BaseModel)`
Represents a file, image, or other content attached to a message.
**Fields:**
- `type: str` — The kind of attachment: `"file"` (generic document), `"image"` (photo/screenshot), `"pocket"` (PocketPaw-specific content), or `"widget"` (embedded interactive component)
- `url: str` — The downloadable/viewable URL where the attachment can be accessed
- `name: str` — The display name of the attachment (e.g., filename or title)
- `meta: dict` — Flexible metadata store for attachment-specific data (e.g., image dimensions, file size, video duration)
**Business logic:** Supports flexible attachment handling. A `"file"` attachment might have `meta={"size_bytes": 1024000, "mime_type": "application/pdf"}`, while an `"image"` attachment might have `meta={"width": 1920, "height": 1080}`. The flexible `meta` field avoids schema changes when new attachment types or properties are added.
### `Reaction(BaseModel)`
Represents an emoji reaction (like a thumbs-up or heart) that users can add to a message.
**Fields:**
- `emoji: str` — The emoji character or code (e.g., `"👍"`, `"❤️"`, `":+1:"`)
- `users: list[str]` — List of User IDs who have reacted with this emoji to the message
**Business logic:** Multiple reactions can be stored in a message's `reactions` list. When User A adds a 👍 reaction that User B already added, the system appends User A's ID to the existing reaction's `users` list rather than creating a duplicate. This normalized structure enables efficient queries like "show me all messages I reacted to with 👍".
### `Message(TimestampedDocument)`
The core model representing a single chat message in a group, inheriting from `TimestampedDocument` which provides `createdAt` and `updatedAt` timestamps.
**Fields:**
**Routing & Identification:**
- `group: Indexed(str)` — The ID of the group this message belongs to. Indexed for fast queries like "fetch all messages in group X"
- `sender: str | None` — The User ID of who sent this message. `None` indicates a system message (e.g., "User X joined the group")
- `sender_type: str` — Whether the sender is a `"user"` (human) or `"agent"` (bot/AI). Allows distinguishing human conversations from system/bot messages
- `agent: str | None` — The Agent ID if `sender_type == "agent"`
**Content & Formatting:**
- `content: str` — The text body of the message
- `mentions: list[Mention]` — Users, agents, or groups mentioned in this message
- `attachments: list[Attachment]` — Files, images, or other content attached to this message
**Threading & Reactions:**
- `reply_to: str | None` — The message ID of the parent message if this is a reply (threaded conversation). `None` for top-level messages
- `reactions: list[Reaction]` — Emoji reactions users have added to this message
**Audit Trail:**
- `edited: bool` — Flag indicating whether this message has been edited after creation
- `edited_at: datetime | None` — Timestamp when the message was last edited. `None` if never edited
- `deleted: bool` — Soft delete flag. `True` means the message is logically deleted but remains in the database for audit/compliance
**Database Configuration (Settings class):**
```
name = "messages" # MongoDB collection name
indexes = [[('group', 1), ('createdAt', -1)]] # Compound index: group ascending, creation time descending
```
This index optimizes the most common query: "fetch messages from group X, sorted newest-first". The descending `createdAt` ensures fetching the latest messages without additional sorting overhead.
## How It Works
### Data Flow
1. **Inbound (API → Message creation):** A client sends a POST request with message data → the FastAPI router validates the request body as a `Message` object → Pydantic automatically validates types and constraints → the message_service receives the validated `Message` instance
2. **Persistence (Message → MongoDB):** The message_service calls Beanie ODM to save the `Message` → Beanie serializes the Pydantic model to JSON → MongoDB stores the document with the `createdAt`/`updatedAt` timestamps from `TimestampedDocument`
3. **Outbound (MongoDB → API response):** The service queries MongoDB and Beanie deserializes documents back to `Message` instances → the router serializes `Message` to JSON in the HTTP response → clients receive fully structured message objects
### Key Patterns
**Hierarchical composition:** `Message` contains lists of `Mention`, `Attachment`, and `Reaction` objects. Each is a small, focused model that can be used independently if needed, but gains meaning when embedded in a message.
**Optional fields for flexibility:** Fields like `sender` (null for system messages), `reply_to` (null for top-level messages), `agent` (null for human senders), and `edited_at` (null for unedited messages) allow one schema to represent multiple scenarios without requiring multiple models.
**Soft deletes:** `deleted: bool` flag allows messages to be "removed" from the UI while preserving the record for audit trails or compliance. Queries should filter `deleted == False` when fetching live messages.
**Metadata flexibility:** The `Attachment.meta` field uses a generic `dict` to avoid schema coupling. New attachment properties can be added without changing the model definition.
## Authorization and Security
This module itself does not enforce authorization — it is a pure data model. However, **authorization must be enforced at higher layers:**
- **Service layer (message_service):** Before a user can read messages from a group, the service must verify the user has permission to access that group
- **API router:** Request handlers should check that the authenticated user owns/can modify a message before allowing edits or deletes
- **Soft deletes:** The `deleted` flag is not access control; it's a UX feature. Deleted messages should still only be visible to users with audit/admin permissions
**Security considerations:**
- Message `content` is treated as user-generated text that may contain injection attacks; sanitization should occur in the service or router layer
- `Mention.id` and `sender` fields should be validated as real entity IDs before storage
- Attachment URLs should be validated for safe protocols (https, trusted domains) to prevent malicious links
## Dependencies and Integration
### Dependencies (Inbound)
- **`ee.cloud.models.base.TimestampedDocument`** — Base class providing `createdAt` and `updatedAt` fields. Used to track message creation and modification times
- **`beanie.Indexed`** — ODM utility for marking the `group` field as indexed in MongoDB
- **`pydantic`** — Provides `BaseModel` and `Field` for validation and schema definition
### Dependents (Outbound)
- **`message_service`** — The core service layer that creates, retrieves, updates, and deletes messages. Receives and returns `Message` instances
- **`router`** — FastAPI route handlers that expose message CRUD endpoints. Validates incoming requests as `Message` and serializes responses
- **`agent_bridge`** — Agent/bot integration that may create messages on behalf of agents. Uses `Message` model with `sender_type="agent"`
- **`service`** — Likely a facade or aggregator service that coordinates across multiple models
- **`__init__`** — Module exports `Message` and related classes for public use
### Example Integration Flow
```
User sends message via web client
→ FastAPI POST /groups/{groupId}/messages
→ router validates request body as Message
→ message_service.create_message(message)
→ Beanie ODM saves to MongoDB
→ Returns saved Message with generated IDs and timestamps
→ router returns JSON serialization of Message
→ WebSocket or polling updates other clients with new message
```
## Design Decisions
### 1. Compound Index on (group, createdAt)
The index is ordered `(group, 1), (createdAt, -1)` because the dominant query is "fetch all messages in a group, newest first". This avoids scanning all group messages or sorting in memory.
### 2. Mentions as Embedded List, Not Document Reference
Mentions are embedded as `list[Mention]` rather than as references to a separate `mention` collection. This keeps all message context in one document and avoids extra queries when retrieving a message. Trade-off: if mention display names change (e.g., user renames), old messages show stale names.
### 3. Soft Deletes (deleted: bool) Over Hard Deletes
Using `deleted: bool` instead of removing documents from the database provides:
- Audit trail (can see what was deleted and when)
- Thread continuity (replies to deleted messages remain readable)
- Regulatory compliance (some regulations require data retention)
Trade-off: queries must always filter `deleted == False`, and storage cost increases for deleted messages.
### 4. Flat Reactions List vs. Nested
Reactions are stored as `list[Reaction]` where each `Reaction` groups an emoji with the users who used it:
```json
{
"emoji": "👍",
"users": ["user_1", "user_2"]
}
```
Alternative (rejected): Store as `dict[emoji: list[user_id]]`. The chosen approach is more explicit and type-safe with Pydantic validation.
### 5. Optional sender for System Messages
Setting `sender: None` indicates a system message rather than creating a special `SystemMessage` subclass. This keeps the schema simpler and allows one query to fetch all messages in a group, both human and system.
### 6. Indexing Only group and createdAt
No index on `sender`, `reply_to`, or `edited` means queries like "find all messages sent by user X" or "find all edited messages" require full scans. This implies these queries are either rare, performed asynchronously (background jobs), or are not in the critical path. If user timelines or edit tracking become common queries, additional indexes should be added.
---
## Related
- [base-foundational-document-model-with-automatic-timestamp-management-for-mongodb](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md)
- [untitled](untitled.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)

View File

@@ -0,0 +1,221 @@
# notification — In-app notification data model and persistence for user workspace events
> This module defines the data models for in-app notifications that inform users about workspace events (mentions, comments, replies, invites, agent completions, and shared pockets). It exists as a dedicated model layer to provide a clean, reusable schema for notification storage and querying, enabling the event system to persist user-facing notifications independently of transactional events. Notifications are workspace-scoped, recipient-indexed, and support lifecycle management (read status, expiration).
**Categories:** Data Model / Persistence, Notification / User Communication, Workspace / Multi-tenancy, Event-Driven Architecture
**Concepts:** Notification, NotificationSource, TimestampedDocument, in-app notifications, workspace scoping, recipient indexing, soft delete pattern, expiration lifecycle, Beanie ODM, MongoDB indexing
**Words:** 1487 | **Version:** 1
---
## Purpose
The notification module exists to provide a persistent, queryable representation of in-app notifications delivered to users. While events (handled elsewhere in the system) represent what happened in the system, notifications represent *communications about those events to specific users*.
**Why separate?** Notifications have distinct concerns:
- **Storage requirements**: Notifications must be queryable by recipient and read status for inbox-style UIs
- **Lifecycle management**: Notifications can expire, be marked read, or be dismissed—different from immutable events
- **Workspace scoping**: Notifications are workspace-isolated resources, unlike some transactional events
- **Performance**: A user may generate thousands of events; notifications are a smaller, intentionally curated subset
**System role**: This module sits at the data model layer, consumed by event handlers that translate system events into user notifications. The `event_handlers` module imports this to create notifications when meaningful events occur; the `__init__` re-exports it for clean public API.
## Key Classes and Methods
### `NotificationSource(BaseModel)`
A lightweight Pydantic model that captures *where* a notification originated—the resource that triggered it.
**Fields:**
- `type: str` — The resource type (e.g., "pocket", "comment", "invite") that triggered the notification
- `id: str` — The identifier of that resource
- `pocket_id: str | None` — Optional reference to a parent pocket, for nested-resource contexts (a comment within a pocket)
**Purpose**: Provides a back-reference so users can navigate from a notification to its source. Unlike storing raw IDs scattered across the Notification schema, this encapsulates the source as a cohesive unit. The optional `pocket_id` handles cases where the source is already within a pocket context.
### `Notification(TimestampedDocument)`
The primary notification persistence model, extending `TimestampedDocument` (which provides `created_at` and `updated_at` timestamps via the base class).
**Core Fields:**
- `workspace: Indexed(str)` — Workspace ID; indexed for tenant isolation. Ensures notifications are workspace-scoped, preventing cross-workspace leakage.
- `recipient: Indexed(str)` — User ID receiving the notification; indexed for fast inbox queries ("get all my notifications").
- `type: str` — Notification category: "mention", "comment", "reply", "invite", "agent_complete", or "pocket_shared". Drives UI rendering logic (different icons/colors per type).
- `title: str` — Short, human-readable summary (e.g., "John mentioned you in a comment").
- `body: str` — Optional longer description or context.
- `source: NotificationSource | None` — Backreference to the triggering resource. Optional because some notifications may be system-generated without a specific source.
- `read: bool = False` — Soft read state. Notifications are not deleted, only marked read. Enables "undo" semantics and analytics.
- `expires_at: datetime | None` — Optional expiration timestamp. Notifications can auto-expire (e.g., time-limited invites). Queries can filter `expires_at > now()` to hide expired notifications.
**Database Settings:**
```python
class Settings:
name = "notifications" # MongoDB collection name
indexes = [
[('recipient', 1), ('read', 1), ('created_at', -1)]
]
```
The composite index optimizes the common query pattern: *"Get unread notifications for user X, sorted by recency."* This is the inbox query performed on every app load. The index enables efficient filtering by recipient and read status, then sorts by creation time descending (newest first).
## How It Works
### Notification Lifecycle
1. **Creation (Event Handler)**: When a system event occurs (e.g., a user mentions another user in a comment), the `event_handlers` module intercepts it and calls `Notification.insert()` with appropriate fields. The handler translates domain events into user-facing notification semantics.
2. **Storage**: Beanie ODM persists the document to MongoDB's `notifications` collection. Timestamps (`created_at`, `updated_at`) are set automatically by `TimestampedDocument`.
3. **Querying**: The inbox UI queries: `Notification.find(recipient=user_id, read=False).sort('created_at', -1)`. The composite index makes this efficient.
4. **User Interaction**:
- **Mark as read**: `Notification.update(read=True)` (typically bulk-updated)
- **Expire**: System daemon or query filter excludes notifications where `expires_at < now()`
- **Navigate to source**: UI extracts `notification.source.type` and `notification.source.id` to navigate user to the comment/pocket/invite.
5. **Retention**: Notifications are not hard-deleted; old read notifications remain in the database for audit/analytics. Admin retention policies may soft-delete (mark with a deleted flag) or archive in a separate collection.
### Data Flow Example
```
Event: User A comments in pocket P, mentioning User B
Event Handler (event_handlers module)
├─ Recognizes @mention pattern
├─ Translates to Notification document:
│ {
│ workspace: "workspace_123",
│ recipient: "user_b_id",
│ type: "mention",
│ title: "Alice mentioned you",
│ body: "in pocket 'Project Plan'",
│ source: { type: "comment", id: "comment_789", pocket_id: "pocket_456" },
│ read: false,
│ created_at: "2024-01-15T10:30:00Z"
│ }
│ ↓
└─ Calls Notification.insert()
MongoDB stores document
User B opens app
├─ UI queries: Notification.find({recipient: "user_b_id", read: false})
├─ Displays list ("Alice mentioned you in Project Plan")
└─ On click: extracts source → navigates to comment_789
```
### Edge Cases
- **Duplicate notifications**: If the same event handler fires twice, two identical notifications are created. Idempotency is the event handler's responsibility, not this model's.
- **Null source**: Some notifications (e.g., "Welcome to workspace") have no actionable source; `source` is optional.
- **Expired but unread**: A notification can expire and remain unread. The UI should hide it (via `expires_at` filter) even if `read=false`.
- **Timezone awareness**: `expires_at` is a `datetime` object. Callers must ensure it's timezone-aware (UTC recommended) for correct comparisons.
## Authorization and Security
This module does not enforce authorization directly; it's a data model, not a service. Authorization is enforced at the API/handler layer:
- **Workspace isolation**: Any query must include `workspace=current_workspace_id` to prevent cross-workspace reads. The model enforces this via schema, but callers are responsible for including it.
- **Recipient access**: Only the recipient (or workspace admins) should be able to read/update a notification. This is enforced in the service/API layer that uses this model, not here.
- **Read status updates**: Only the recipient can mark their own notifications as read. Again, enforced upstream.
The indexed `recipient` field enables efficient access control checks ("does user own this notification?").
## Dependencies and Integration
### Imports
- **`beanie.Indexed`**: ODM decorator for MongoDB indexing. Signals that `workspace` and `recipient` are index-participating fields for efficient queries.
- **`ee.cloud.models.base.TimestampedDocument`**: Base class providing `created_at` and `updated_at` fields. Ensures all notifications have creation/update timestamps without boilerplate.
- **`pydantic.BaseModel`, `pydantic.Field`**: Data validation and schema definition. `NotificationSource` uses BaseModel directly for nested validation.
### Exported To
- **`event_handlers`**: Imports `Notification` to instantiate and persist notifications when events occur. This is the primary consumer.
- **`__init__` (cloud.models)**: Re-exports for clean public API (`from ee.cloud.models import Notification`).
### Relationship to Other Models
- **Events** (elsewhere in codebase): Events are immutable, system-wide records. Notifications are mutable (read status), user-scoped derivatives of events.
- **User/Workspace models**: Notifications reference these by ID (`recipient`, `workspace`) but do not embed them (no foreign key relationships in MongoDB). The caller is responsible for ensuring referential integrity.
- **Comment/Pocket/Invite models**: Referenced indirectly via `NotificationSource.id`. No direct dependency here; event handlers perform the translation.
## Design Decisions
### 1. **Soft Read State vs. Deletion**
**Decision**: Notifications are marked `read: bool` rather than deleted when read.
**Rationale**:
- Preserves notification history for user reference ("did I already see this?")
- Enables notification badges ("5 unread notifications")
- Supports undo/restore workflows
- Provides analytics data (when did user read what?)
- Avoids hard deletes, which complicate recovery and auditing
### 2. **Optional Expiration**
**Decision**: `expires_at: datetime | None` is optional and must be explicitly checked in queries.
**Rationale**:
- Most notifications are perpetual; optional field avoids clutter
- Expiration logic lives in the query layer, not the model (read-only concern)
- Flexibility: invitations may expire, but mention notifications don't
- Trade-off: Callers must remember to filter expired notifications; no automatic hiding
### 3. **Composite Index on (recipient, read, created_at)**
**Decision**: Single three-field index rather than separate indexes or two-field variants.
**Rationale**:
- Optimizes the dominant query: "unread notifications for user X, sorted by time"
- (recipient, read) filters the set quickly; (created_at, -1) sorts within it
- Avoids index explosion for a small model
- Trade-off: Queries on other field combinations (e.g., just recipient) still benefit but with secondary sort
### 4. **Nested NotificationSource Model**
**Decision**: `source` is a separate Pydantic model, not a flat set of fields.
**Rationale**:
- Encapsulation: source information is cohesive
- Reusability: if other models need to reference a resource, they can use `NotificationSource`
- Validation: Pydantic validates source structure at insertion
- Trade-off: Slightly more verbose than flat fields; worth it for clarity
### 5. **No Explicit User/Workspace Validation**
**Decision**: `workspace` and `recipient` are strings; no validation against user/workspace documents.
**Rationale**:
- MongoDB is schemaless; validation would require additional queries
- In a distributed system, referential integrity is better handled by event handlers (which create notifications and can verify source existence)
- Avoids tight coupling between models
- Trade-off: Orphaned notifications are possible if a user is deleted; handled via cleanup jobs, not model logic
## Common Query Patterns
**Get inbox (unread notifications for a user):**
```python
await Notification.find(
Notification.workspace == workspace_id,
Notification.recipient == user_id,
Notification.read == False,
Notification.expires_at == None | (Notification.expires_at > datetime.now())
).sort([("created_at", -1)]).to_list()
```
**Mark notifications as read:**
```python
await Notification.find(
Notification.recipient == user_id,
Notification.read == False
).update({"$set": {"read": True}})
```
**Get all notifications (including read) for user:**
```python
await Notification.find(Notification.recipient == user_id).sort([("created_at", -1)]).to_list()
```
---
## Related
- [base-foundational-document-model-with-automatic-timestamp-management-for-mongodb](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)
- [untitled](untitled.md)

View File

@@ -0,0 +1,253 @@
# pocket — Data models for Pocket workspaces with widgets, teams, and collaborative agents
> This module defines the core document models (Pocket, Widget, WidgetPosition) that represent collaborative workspaces in the OCEAN platform. Pockets are the primary workspace container that hold widgets (UI components), team members, and assigned agents, with support for sharing and ripple specifications. It exists as a separate module to establish the authoritative schema and enable other services (pocket service, event handlers, API layer) to work with a consistent, validated data structure.
**Categories:** workspace management, data model / schema, collaborative features, CRUD, document structure
**Concepts:** Pocket (workspace container), Widget (embedded UI component), WidgetPosition (grid layout), TimestampedDocument (base class with created_at/updated_at), Beanie ODM (MongoDB object mapping), Pydantic model validation, Field aliases (camelCase ↔ snake_case), Workspace scoping (multi-tenancy), Visibility enum (private/workspace/public), Share link token (anonymous access)
**Words:** 1788 | **Version:** 1
---
## Purpose
The `pocket` module defines the data layer for Pocket workspaces — the core collaborative workspace abstraction in OCEAN. A Pocket is a container that:
- Organizes widgets (customizable UI components) on a visual grid
- Associates a team of users and intelligent agents
- Enables sharing with fine-grained access control (private/workspace/public)
- Optionally defines a "ripple spec" — a workflow or automation specification
This module exists because:
1. **Schema Definition**: It's the single source of truth for how workspace data is structured, validated, and persisted to MongoDB via Beanie ODM
2. **Frontend-Backend Alignment**: Field aliases (e.g., `dataSourceType``_dataSourceType` for JSON) ensure the Python backend and JavaScript frontend speak the same language
3. **Type Safety**: Pydantic models provide runtime validation and IDE support for code using these objects
4. **Cross-Functional Integration**: By centralizing the schema, services (pocket service), event handlers, and API routers can all depend on this single definition
## Key Classes and Methods
### WidgetPosition
**Purpose**: A lightweight coordinate model for placing widgets on a grid-based layout.
**Fields**:
- `row: int = 0` — Grid row index
- `col: int = 0` — Grid column index
**Design Note**: This is a simple, reusable subdocument. It doesn't need MongoDB persistence concerns because it's always embedded within a Widget.
---
### Widget
**Purpose**: A Pydantic subdocument representing a single UI widget embedded within a Pocket. Widgets are the building blocks of the workspace — each one can display data, execute actions, or represent an agent interface.
**Key Design Decision**: Widgets have their own `id` field (aliased as `_id` in JSON) so the frontend can address and update widgets by ID rather than by array index. This makes widget references resilient to reordering.
**Fields**:
| Field | Type | Default | Notes |
|-------|------|---------|-------|
| `id` | str | UUID from ObjectId | Aliased as `_id` for frontend; allows direct widget addressing |
| `name` | str | Required | Display name for the widget |
| `type` | str | "custom" | Widget category; could be "chart", "table", "agent-panel", etc. |
| `icon` | str | "" | Icon identifier (CSS class, emoji, or URL) |
| `color` | str | "" | Color for UI theming |
| `span` | str | "col-span-1" | Tailwind CSS grid span class (e.g., "col-span-2" for wider widgets) |
| `dataSourceType` | str | "static" | How data is populated: "static" (hardcoded), "dynamic" (fetched), "agent" (from an agent), etc. |
| `config` | dict | {} | Type-specific configuration; structure depends on `type` |
| `props` | dict | {} | Runtime properties passed to the widget renderer |
| `data` | Any | None | The actual data displayed by the widget (cached or computed) |
| `assignedAgent` | str \| None | None | ID of an agent assigned to this widget (if applicable) |
| `position` | WidgetPosition | Default(0,0) | Grid placement |
**Pydantic Configuration**:
- `populate_by_name = True`: Accepts both snake_case Python names and camelCase aliases (e.g., both `dataSourceType` and `data_source_type`)
- This is essential for bidirectional API compatibility
---
### Pocket
**Purpose**: The primary workspace document. Inherits from `TimestampedDocument` (providing `created_at` and `updated_at` timestamps) and represents a collaborative workspace with widgets, team management, and sharing controls.
**Key Design Decisions**:
1. **Workspace Scoping**: Indexed on `workspace` field for efficient tenant isolation
2. **Flexible Team/Agent References**: `team` and `agents` fields are typed as `list[Any]` to support both ID strings and populated objects (relationship flexibility)
3. **Visibility + Sharing**: Combines a visibility enum (private/workspace/public) with explicit `shared_with` list for granular access control
4. **Ripple Spec**: Optional field for complex workflow automation specs (decoupled from the Pocket schema)
**Fields**:
| Field | Type | Default | Constraints | Purpose |
|-------|------|---------|-------------|----------|
| `workspace` | Indexed(str) | Required | Indexed for queries | Tenant/workspace ID for multi-tenancy |
| `name` | str | Required | — | Human-readable workspace name |
| `description` | str | "" | — | Optional long-form description |
| `type` | str | "custom" | No enum — flexible | Category: "deep-work", "data", "custom", etc. |
| `icon` | str | "" | — | UI representation |
| `color` | str | "" | — | UI theming |
| `owner` | str | Required | — | User ID of workspace creator |
| `team` | list[Any] | [] | — | User IDs or populated User objects (lazy or eager loading) |
| `agents` | list[Any] | [] | — | Agent IDs or populated Agent objects |
| `widgets` | list[Widget] | [] | — | Embedded Widget subdocuments |
| `rippleSpec` | dict \| None | None | — | Optional workflow/automation config; structure TBD by feature |
| `visibility` | str | "private" | `^(private\|workspace\|public)$` | Scope of default access |
| `share_link_token` | str \| None | None | — | Anonymous share token (if public via link) |
| `share_link_access` | str | "view" | `^(view\|comment\|edit)$` | Permission level for shared link |
| `shared_with` | list[str] | [] | — | Explicit user IDs with granted access (overrides visibility) |
**Pydantic Configuration**:
- `populate_by_name = True`: Supports both snake_case and camelCase
**MongoDB Settings**:
- `name = "pockets"`: Collection name in MongoDB
- Inherits timestamp management from `TimestampedDocument`
---
## How It Works
### Data Flow
1. **Creation**: Frontend sends a JSON payload with camelCase fields (e.g., `{"name": "Q1 Planning", "dataSourceType": "dynamic"}`).
2. **Validation**: Pydantic parses the JSON, applies aliases to map camelCase → snake_case, validates field types and patterns (e.g., visibility must be private/workspace/public).
3. **Persistence**: Beanie ODM serializes the validated model to BSON and writes to MongoDB's `pockets` collection. Timestamps are automatically set.
4. **Retrieval**: Queries via workspace index are fast. Widgets are returned as embedded documents within the Pocket.
5. **Updates**: Widget updates can target specific widgets by ID without affecting others or the array index.
### Control Flow Example: Creating a Widget in a Pocket
```
User Action (Frontend)
API Router receives POST /pockets/{pocket_id}/widgets
Pocket Service validates widget data as Widget model
Widget is appended to pocket.widgets list
Pocket document is saved (all widgets serialized)
Event Handler (e.g., on_pocket_updated) may trigger downstream actions
```
### Edge Cases
- **Widget ID Collisions**: Extremely unlikely (ObjectId-based), but if a Widget is created without an explicit `id`, a new one is generated. Duplicates would be caught at the API layer.
- **Team/Agent Polymorphism**: Since `team` and `agents` accept `Any`, downstream services must handle both scalar IDs and populated objects. Consider using a discriminated union or strict type validation at the service layer.
- **Ripple Spec Flexibility**: The schema doesn't validate `rippleSpec` content, delegating validation to the ripple/workflow service.
- **Visibility vs. Shared Access**: A Pocket can be "private" but still have users in `shared_with`. The authorization layer (not this module) must decide which takes precedence.
## Authorization and Security
This module **does not enforce authorization**; it only defines the data model. Authorization is handled elsewhere (likely in the API router or a middleware layer). However, the schema supports these access patterns:
- **Visibility Enum**: Defines default scope (private to owner, workspace to team, public to anyone)
- **Owner Field**: The creating user; typically has full permissions
- **Shared With List**: Explicit user IDs with granted access, overriding visibility
- **Share Link Token**: Anonymous access via token (useful for public dashboards)
- **Share Link Access**: Granular permission for link sharers (view-only, comment, edit)
**Who Can Modify a Pocket**:
Typically the owner or users in `shared_with` with "edit" access. The pocket service layer validates this before mutation.
## Dependencies and Integration
### Inbound Dependencies
**What depends on this module**:
| Dependent | Usage | Reason |
|-----------|-------|--------|
| `ee.cloud.models.__init__` | Re-exports Pocket, Widget, WidgetPosition | Makes models available to the package |
| `pocket_service` | CRUD operations on Pocket documents | Queries, creates, updates, deletes Pockets and Widgets |
| `event_handlers` | Listens to Pocket lifecycle events | Triggers downstream actions (notifications, ripple execution, etc.) when Pockets/Widgets change |
| API routers | Request/response serialization | Converts HTTP JSON ↔ Pocket/Widget models |
### Outbound Dependencies
**What this module depends on**:
| Dependency | From | Purpose |
|------------|------|----------|
| `TimestampedDocument` | `ee.cloud.models.base` | Base class providing `created_at` and `updated_at` fields and MongoDB integration |
| `Beanie` | beanie | ODM (Object-Document Mapper) for MongoDB; `Indexed` for efficient queries |
| `Pydantic` | pydantic | Data validation, serialization, field aliases |
| `ObjectId` | bson | BSON MongoDB ID generation |
### Integration Pattern
The module is a **schema definition layer** that sits between the database (MongoDB) and business logic (service layer). It's consumed by:
- **Service Layer**: Uses Pocket/Widget models for typed method signatures
- **API Layer**: Deserializes requests into models, serialializes responses
- **Event Handlers**: Receives model instances when documents change
---
## Design Decisions
### 1. Widget ID Independence
**Decision**: Widgets have their own `id` field instead of being addressed by array index.
**Rationale**:
- Frontend widgets are often reordered on the UI; using indices would break references
- IDs allow direct widget updates without reloading the entire Pocket
- Mirrors REST best practices (each resource has an ID)
---
### 2. Field Aliases for Frontend Compatibility
**Decision**: camelCase aliases (e.g., `dataSourceType` → Python `dataSourceType` with alias `"dataSourceType"`) coexist with snake_case Python field names.
**Rationale**:
- JavaScript frontend sends/expects camelCase (convention)
- Python backend prefers snake_case (PEP 8)
- Pydantic's `populate_by_name = True` lets both work seamlessly
- No manual marshaling needed
---
### 3. Flexible Team/Agent References
**Decision**: `team` and `agents` fields accept `list[Any]` rather than `list[str]` or `list[ObjectId]`.
**Rationale**:
- Supports both lazy loading (store IDs) and eager loading (populate full objects)
- Reduces database round-trips if team/agent data is needed immediately
- Trade-off: Less type safety; requires downstream validation
---
### 4. Optional Ripple Spec
**Decision**: `rippleSpec` is a loose `dict[str, Any] | None`, not a strict schema.
**Rationale**:
- Ripple feature is evolving; tight coupling would require schema migrations
- Allows Pocket service to store ripple data without understanding it
- Ripple service owns validation and interpretation
---
### 5. Visibility Enum + Explicit Sharing
**Decision**: Combines a default visibility level (private/workspace/public) with an explicit `shared_with` list.
**Rationale**:
- Visibility covers common cases (keep it private by default)
- Explicit list allows fine-grained control without creating many visibility levels
- Trade-off: Authorization logic must handle precedence rules (Does "public" override `shared_with`? etc.)
---
### 6. Share Link Separation
**Decision**: Share links are represented as a token + access level, not as users in `shared_with`.
**Rationale**:
- Links can be revoked without tracking who used them
- Anonymous access doesn't require user accounts
- Different permission model (view-only for links, edit for team members)
---
## Related
- [base-foundational-document-model-with-automatic-timestamp-management-for-mongodb](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)
- [untitled](untitled.md)

View File

@@ -0,0 +1,107 @@
# pockets.__init__ — Entry point and public API aggregator for the pockets subsystem
> This module serves as the public interface for the enterprise cloud pockets subsystem by re-exporting the router component. It acts as a facade pattern implementation that hides the internal module structure while exposing only the necessary routing layer to parent packages. This is a minimal __init__ file that defines the top-level API boundary for the pockets feature domain.
**Categories:** API router layer, workspace and collaboration domain, enterprise cloud platform, package initialization and namespacing
**Concepts:** router, facade pattern, public API boundary, package namespace, FastAPI Router, re-export pattern, workspace scoping, user authentication, session management, enterprise licensing
**Words:** 681 | **Version:** 1
---
## Purpose
The `pockets.__init__` module exists to establish a clear public API boundary for the pockets subsystem within the enterprise cloud platform. By re-exporting `router` from `ee.cloud.pockets.router`, it implements the **facade pattern**, allowing parent packages to import routing functionality without needing knowledge of internal module organization.
This pattern is common in Python package architecture for several reasons:
- **API Stability**: Changes to internal module organization don't break external imports
- **Explicit Public Interface**: Only `router` is publicly available; other modules (errors, user, session, etc.) are implementation details
- **Clear Responsibility**: The __init__ file makes it obvious what the package exports at a glance
- **Namespace Control**: Prevents unintended public exposure of internal utilities
The pockets subsystem appears to be a major feature domain within the enterprise cloud platform, handling collaborative workspaces, user access, permissions, and related infrastructure.
## Key Classes and Methods
This module does not define any classes or functions of its own. Instead, it re-exports:
### `router` (from `ee.cloud.pockets.router`)
A FastAPI Router instance that handles all HTTP endpoints related to the pockets feature domain. The router is imported with `# noqa: F401` comment to suppress unused-import warnings, indicating this is intentionally re-exported rather than used locally.
The actual router implementation would contain endpoints for:
- Workspace management
- User permissions and access control
- Messaging and collaboration
- File and group management
- Notifications and event handling
## How It Works
The import mechanism is straightforward:
```
parent package → ee.cloud.pockets.__init__ → ee.cloud.pockets.router.router → FastAPI Router instance
```
When a parent module (or API initialization code) imports from `ee.cloud.pockets`, it receives the `router` object, which can then be included in the main FastAPI application via `app.include_router(router)`.
The single-line implementation suggests:
1. The heavy lifting (route definitions, validation, business logic) lives in sibling modules
2. This __init__ file is deliberately minimal, following the principle of minimal public API surface
3. The internal modules (comment, file, group, invite, message, notification, pocket, session, workspace, etc.) are composition dependencies used by the router but not exposed publicly
## Authorization and Security
While this specific file doesn't implement authorization, the fact that `user`, `license`, and access control systems are imported at the package level suggests that:
- Routes defined in the exported `router` likely perform authentication and authorization checks
- The pockets subsystem respects enterprise licensing (`license` module)
- User context and session management are core concerns (`user`, `session` modules)
- Invite and permission systems (`invite`, `group` modules) likely restrict resource access based on user roles
## Dependencies and Integration
This module depends on:
- **`ee.cloud.pockets.router`**: The main FastAPI Router containing endpoint definitions
The pockets package internally depends on (based on import graph):
- **`errors`**: Custom exception types for the pockets domain
- **`workspace`**: Core workspace data model and operations
- **`user`**: User identity and authentication context
- **`session`**: Session management and tracking
- **`license`**: Enterprise license validation
- **`comment`, `file`, `group`, `invite`, `message`, `notification`, `pocket`**: Feature-specific modules
- **`event_handlers`**: Event-driven notification system
- **`agent_bridge`**: Integration point for autonomous agents
- **`core`**: Shared core utilities
- **`agent`**: Agent-related functionality
- **`deps`**: Dependency injection utilities (likely FastAPI dependencies)
The pockets subsystem is likely a major domain, suggesting this router is included in the main application at `/ee/cloud/__init__.py` or a parent router aggregator.
## Design Decisions
**Minimal Public API Surface**: The re-export of only `router` is intentional. All helper modules, data models, and service layers remain internal implementation details. This reduces cognitive load for consumers and prevents accidental dependencies on unstable APIs.
**Single Line Implementation**: This follows Python best practices for package __init__ files that primarily serve as namespace organizers rather than logic containers. The `# noqa: F401` directive shows awareness of linting tools and code quality standards.
**Facade Pattern**: By presenting `router` as the single public interface, the module implements the facade pattern, allowing internal refactoring without affecting consumers. For example, if the router were split into multiple routers, only this file would need to change.
**Enterprise Architecture Implication**: The existence of separate modules for licensing, user management, permissions, and events suggests this is an enterprise-grade platform with complex access control and feature gating requirements.
---
## Related
- [untitled](untitled.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation](comment-threaded-comments-on-pockets-and-widgets-with-workspace-isolation.md)
- [file-cloud-storage-metadata-document-for-managing-file-references](file-cloud-storage-metadata-document-for-managing-file-references.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [notification-in-app-notification-data-model-and-persistence-for-user-workspace-e](notification-in-app-notification-data-model-and-persistence-for-user-workspace-e.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)

View File

@@ -0,0 +1,156 @@
# ripple_normalizer — Normalizes AI-generated pocket specifications into a consistent, persistence-ready format
> This module provides a single public function, `normalize_ripple_spec()`, that takes potentially incomplete or AI-generated pocket specifications and transforms them into a standardized format with guaranteed envelope fields, valid IDs, and widget metadata. It exists as a dedicated module to centralize the schema validation and enrichment logic that bridges the gap between flexible AI-generated specs and the stricter requirements of the persistence layer. It sits at the boundary between the agent layer (which generates specs) and the service/storage layer (which persists them).
**Categories:** Data Transformation & Normalization, Agent Integration Layer, Specification Management, Utility & Infrastructure
**Concepts:** normalize_ripple_spec, _short_id, rippleSpec, pocket specification, envelope fields, pure transformation function, format-aware normalization, multi-pane specs, UISpec v1.0, flat widget list
**Words:** 1412 | **Version:** 1
---
## Purpose
When AI agents or user interactions generate pocket specifications in the OCEAN system, those specs are often incomplete, variable in structure, or missing critical metadata needed for persistence and runtime operation. The `ripple_normalizer` module solves this by providing a lightweight normalizer that:
1. **Ensures structural consistency**: Every spec that passes through gets guaranteed envelope fields (`lifecycle`, `version`, `intent`, `metadata`) regardless of input format.
2. **Generates missing identifiers**: Auto-generates globally unique pocket IDs and widget IDs when not provided, using cryptographically secure random tokens.
3. **Preserves flexibility**: Handles multiple spec formats (multi-pane, UISpec v1.0, flat widget lists) without forcing a single schema.
4. **Enriches metadata**: Applies sensible defaults for color, category, and display configuration.
In the larger system architecture, this normalizer acts as a **data transformation layer** that sits between the agent/generation layer (which produces specs) and the service layer (which persists and retrieves them). It is invoked by `agent_bridge` when specs are generated and by `service` when specs are ingested, ensuring that all specs in the system conform to a predictable structure before they hit the database or are served to the UI.
## Key Classes and Methods
### `_short_id() → str`
**Purpose**: Generate a cryptographically secure random short identifier.
**Implementation**: Uses `secrets.token_hex(4)` to produce an 8-character hexadecimal string. This is a simple, internal utility used whenever a new pocket or widget ID must be generated.
**Why separate?** Keeps ID generation logic isolated and testable; allows future changes to ID format without affecting the main normalization logic.
### `normalize_ripple_spec(spec: dict[str, Any] | None) → dict[str, Any] | None`
**Purpose**: The main entry point. Normalizes a rippleSpec dictionary by ensuring envelope fields, validating structure, and enriching missing metadata.
**Key Business Logic**:
1. **Null/invalid input handling**: Returns `None` if input is `None`, falsy, or not a dictionary. This allows graceful degradation in caller code.
2. **Name extraction**: Tries `spec["title"]` first, falls back to `spec["name"]`. This dual-field approach accommodates both naming conventions in AI-generated specs.
3. **Pocket ID resolution** (in priority order):
- Use `spec["id"]` if present
- Fall back to `spec["lifecycle"]["id"]` if present
- Generate new ID using `pocket-{_short_id()}` format (e.g., `pocket-a1b2c3d4`)
4. **Metadata and color extraction**: Combines color from top level or metadata dict, with fallback to Material Design blue (`#0A84FF`).
5. **Envelope construction**: Builds a consistent envelope dict with:
- `lifecycle`: Existing value or new `{"type": "persistent", "id": pocket_id}`
- `title` and `name`: Both set to the resolved name
- `color`: Resolved color value
- `metadata`: Merged dict with category (defaulting to `"custom"`), color, and any existing metadata
6. **Format-aware normalization** (three paths):
**Path A — Multi-pane specs**: If `spec["panes"]` is a dict, the spec is treated as a multi-pane layout. The envelope is merged in and `version` is set to `"1.0"` (or existing value). Everything else passes through unchanged, preserving the complex pane structure.
**Path B — UISpec v1.0**: If `spec["ui"]` is a dict with a `type` field, it's treated as a structured UISpec. Envelope is merged, `version` defaults to `"1.0"`. The `ui` structure is preserved as-is.
**Path C — Flat widget list**: If `spec["widgets"]` is a non-empty list, the spec is a simple flat dashboard. This path performs the most transformation:
- Each widget gets an auto-generated `id` if missing (format: `{pocket_id}-w{index}`, e.g., `pocket-a1b2c3d4-w0`)
- Each widget gets a `title` from its `name` field or auto-generated `"Widget N"` label
- `version` defaults to `"2.0"` (indicating flat widget schema)
- `intent` defaults to `"dashboard"`
- `display` defaults to `{"columns": 3}`
- `dashboard_layout` defaults to `{"type": "grid", "columns": 3, "gap": 10}`
**Path D — No structured content**: If none of the above conditions match, return the spec with just the envelope merged in, preserving whatever structure was provided.
## How It Works
**Data Flow**:
1. **Input**: A dictionary representing a pocket spec, typically from AI generation (`agent_bridge`) or user input (`service`).
2. **Validation**: Check for null/non-dict and bail early if invalid.
3. **Extraction**: Pull all needed fields (name, ID, color, metadata) with cascading fallbacks.
4. **Envelope build**: Assemble the guaranteed minimal set of fields every spec needs.
5. **Format detection & enrichment**: Branch based on structure (panes, ui, widgets, or plain) and apply format-specific transformations.
6. **Return**: A merged spec dict with envelope + format-specific fields.
**Edge Cases Handled**:
- **Null input**: Returns `None` immediately, no error thrown.
- **Empty widgets list**: Treated as no-structure case; returns with envelope only.
- **Widget list with non-dict entries**: Non-dict items are silently skipped; only valid dicts are processed.
- **Missing widget title**: Auto-generated as `"Widget {index + 1}"`.
- **Missing pocket ID across all sources**: A new ID is unconditionally generated.
- **Metadata merge**: Existing metadata is preserved and extended (using `**meta` spread), so custom fields survive normalization.
- **Color priority**: Direct `color` field wins, then metadata color, then hardcoded default. No error if color is invalid CSS; it's passed through as-is for client-side validation.
**Determinism & Idempotence**:
- If a spec is normalized twice and the first result includes auto-generated IDs, the second normalization preserves those IDs (since `spec.get("id")` will now find them).
- ID generation is non-deterministic (uses `secrets.token_hex`), so repeated normalizations of the *same* incomplete spec will generate different IDs—callers must not rely on ID stability until the spec is persisted.
## Authorization and Security
No explicit authorization checks exist in this module. It is a **pure transformation function** with no state, no database access, and no privilege checks. Security is the responsibility of callers:
- **agent_bridge**: Must validate that the AI agent has permission to create specs in the target workspace.
- **service**: Must validate that the user has permission to create or modify pockets before calling this normalizer.
The use of `secrets.token_hex()` (not `random.hex()`) ensures ID generation is cryptographically sound, making IDs unpredictable and suitable as unique identifiers in multi-tenant systems.
## Dependencies and Integration
**External Dependencies**: Only the Python standard library (`secrets` module for cryptographic randomness).
**Internal Dependencies**: None—this module has zero imports from the rest of the codebase, making it a true utility library with no coupling.
**Callers**:
- **agent_bridge**: Invokes `normalize_ripple_spec()` after AI agents generate a spec, before passing it to `service` for persistence.
- **service**: Likely calls this normalizer during spec ingestion to ensure consistency before storing in the database.
**Data Flow**:
```
AI Agent (via agent_bridge)
normalize_ripple_spec()
service (persistence layer)
database / runtime system
```
The normalizer is intentionally placed *before* the service layer to ensure the service always receives a normalized spec, reducing defensive checks downstream.
## Design Decisions
### 1. **Graceful Null Handling**
Returning `None` for invalid input rather than raising an exception allows call sites to decide whether to treat it as an error or a no-op. This is common in data transformation pipelines where invalid input may be expected in some contexts.
### 2. **Format-Aware, Not Format-Enforcing**
The module detects and handles three distinct spec formats (multi-pane, UISpec v1.0, flat widgets) without converting between them. This preserves the semantic richness of complex specs while still normalizing simple ones. A stricter design would force all specs into a single canonical format, but that would lose information and complicate backward compatibility.
### 3. **Minimal Envelope**
The envelope contains only fields essential for persistence and runtime operation: `lifecycle`, `version`, `intent`, `title`, `name`, `color`, `metadata`. Non-essential fields are merged through unchanged (`{**spec, **envelope}`), allowing specs to carry arbitrary extra data without being rejected.
### 4. **Auto-ID Generation with Hierarchical Fallback**
The multi-level ID resolution (direct `id``lifecycle.id` → generated) means specs can be built incrementally by different systems without ID collisions, and partial specs can be normalized safely. The fallback to generation ensures IDs never go missing.
### 5. **Widget ID Naming Convention**
Flat widget IDs use the format `{pocket_id}-w{index}` (e.g., `pocket-abc123-w0`), making widget IDs directly traceable to their parent pocket. This enables efficient querying and debugging without requiring a separate parent reference.
### 6. **Version as Format Indicator**
Version `"1.0"` indicates multi-pane or UISpec format (complex, nested); version `"2.0"` indicates flat widget format (simpler, more common). This allows downstream code to branch on version without separate schema detection logic.
### 7. **Secrets Over Random**
Using `secrets.token_hex()` instead of `random` or UUID ensures the IDs are cryptographically unpredictable, important in a system where IDs might be exposed via URLs or APIs and used as access tokens in some contexts.
### 8. **Stateless Pure Function**
The main `normalize_ripple_spec()` function has no side effects, no mutable state, no external I/O. This makes it trivial to test, parallelize, cache, or execute in sandboxed environments. It's a **pure transformation**, not a service.
---
## Related
- [untitled](untitled.md)

View File

@@ -0,0 +1,258 @@
# router — FastAPI authentication endpoints and user profile management
> This module exposes HTTP endpoints for user authentication, registration, profile retrieval, profile updates, and workspace selection. It acts as the HTTP layer for the auth domain, delegating business logic to AuthService while leveraging fastapi-users for standardized OAuth2/cookie-based authentication. It exists as a separate module to cleanly separate API route definitions from domain logic and security policies.
**Categories:** authentication, API router layer, FastAPI HTTP endpoints, CRUD operations, multi-tenant architecture
**Concepts:** APIRouter, FastAPI dependency injection, Depends(current_active_user), fastapi-users library, cookie-based authentication, bearer token authentication, OAuth2, stateless authentication, user profile management, workspace scoping
**Words:** 1431 | **Version:** 1
---
## Purpose
The router module is the **HTTP API layer** for the authentication domain in PocketPaw's cloud infrastructure. It serves three critical functions:
1. **Expose authentication endpoints**: Provides login, logout, and user registration routes via fastapi-users integration
2. **User profile management**: Allows authenticated users to retrieve and update their profiles
3. **Workspace routing**: Enables users to select their active workspace, a core feature of multi-tenant applications
This module exists because PocketPaw separates concerns into layers:
- **Core** (`ee.cloud.auth.core`): Authentication configuration and security setup
- **Service** (`ee.cloud.auth.service`): Business logic for profile and workspace operations
- **Router** (this module): HTTP endpoint definitions that bind requests to service calls
This layered architecture makes the codebase testable, maintainable, and allows non-HTTP interfaces (e.g., gRPC, webhooks) to reuse the same service logic.
## Key Classes and Methods
### Router Instance
```python
router = APIRouter(tags=["Auth"])
```
A FastAPI APIRouter instance tagged as "Auth" for OpenAPI documentation. All endpoints in this module are registered here and later included in the main FastAPI application via the `__init__.py` module.
### Included Routers (from fastapi-users)
The module includes three pre-built router sets from the fastapi-users library:
1. **Cookie-based authentication** (`/auth` prefix)
- Endpoints: POST `/auth/login`, POST `/auth/logout`
- Uses HTTP cookies for session management
- Ideal for browser-based clients
2. **Bearer token authentication** (`/auth/bearer` prefix)
- Endpoints: POST `/auth/bearer/login`, POST `/auth/bearer/logout`
- Uses Authorization header with JWT/bearer tokens
- Ideal for API clients, mobile apps, third-party integrations
3. **User registration** (`/auth` prefix)
- Endpoint: POST `/auth/register`
- Creates new User records with UserCreate schema validation
- Returns UserRead schema on success
These are **framework-provided routes** that handle the heavy lifting of OAuth2/OpenID flows, password hashing, and token management.
### `get_me(user)` → GET `/auth/me`
**Purpose**: Return the authenticated user's profile information.
**Parameters**:
- `user`: Injected via `Depends(current_active_user)` — a User dependency that verifies the request includes valid authentication credentials
**Implementation**: Delegates to `AuthService.get_profile(user)`, which formats the user's core profile data (likely ID, email, name, workspace associations) for the response.
**Security**: Only accessible with valid authentication. The `current_active_user` dependency (from `ee.cloud.auth.core`) enforces this.
**Use case**: Called by frontend when loading user dashboard or sidebar to display "Logged in as [name]".
### `update_me(body, user)` → PATCH `/auth/me`
**Purpose**: Allow authenticated users to update their own profile information.
**Parameters**:
- `body`: A `ProfileUpdateRequest` schema object containing fields the user wants to update (e.g., name, avatar, preferences)
- `user`: The authenticated user making the request (dependency injection)
**Implementation**: Passes both to `AuthService.update_profile(user, body)`, which validates changes, applies updates, and persists to the database.
**Security**: Only the user's own profile can be updated (enforced by receiving their own User object from the dependency).
**Use case**: Allows users to change their name, profile picture, or other mutable user attributes.
### `set_active_workspace(body, user)` → POST `/auth/set-active-workspace`
**Purpose**: Update which workspace the user is currently working in (for multi-tenant workspaces).
**Parameters**:
- `body`: A `SetWorkspaceRequest` containing the `workspace_id` to activate
- `user`: The authenticated user
**Implementation**:
1. Calls `AuthService.set_active_workspace(user, body.workspace_id)` to update the user's active workspace
2. Returns a confirmation response with the format: `{"ok": True, "activeWorkspace": "workspace-id"}`
**Security**: The service layer likely verifies the user has access to the requested workspace (preventing privilege escalation).
**Use case**: When a user with access to multiple workspaces switches between them (e.g., "Switch to Workspace B").
## How It Works
### Request Flow
1. **HTTP Request arrives** at an endpoint (e.g., `GET /auth/me`)
2. **FastAPI processes** route matching and dependency injection
3. **`current_active_user` dependency** (from `ee.cloud.auth.core`) validates authentication:
- Checks for valid cookie or bearer token
- Extracts the User object from the session/token
- Raises 401 Unauthorized if missing or invalid
4. **Endpoint handler** receives the validated `user` and/or `body`
5. **Delegates to AuthService** methods to perform business logic (retrieve profiles, validate workspace access, etc.)
6. **Response returned** to client as JSON
### Data Flow for Profile Update
```
Client Request (PATCH /auth/me with ProfileUpdateRequest)
FastAPI validates ProfileUpdateRequest against schema
Dependency injection: current_active_user verifies auth
update_me() calls AuthService.update_profile(user, body)
AuthService applies business logic (validation, db updates)
Response returned to client
```
### Data Flow for Workspace Switch
```
Client Request (POST /auth/set-active-workspace with workspace_id)
Dependency injection: current_active_user retrieves User
set_active_workspace() calls AuthService.set_active_workspace()
AuthService validates user has access to workspace
AuthService updates user.active_workspace in database
Confirmation response sent
```
### Key Design: Dependency Injection
FastAPI's dependency injection system (`Depends()`) is used to:
- **Enforce authentication** before the endpoint handler runs
- **Reduce boilerplate** (no manual token parsing in each endpoint)
- **Improve testability** (dependencies can be mocked in unit tests)
- **Centralize security logic** (auth rules live in one place: `current_active_user`)
## Authorization and Security
### Authentication Methods
Two parallel mechanisms support different client types:
1. **Cookie-based** (browsers): Stateful sessions, CSRF-protected
2. **Bearer tokens** (API clients): Stateless JWT/OAuth2 tokens
Both use the same underlying User model and validation logic.
### Access Control
**Profile endpoints** (`/auth/me`, `PATCH /auth/me`):
- Require valid authentication (enforced by `current_active_user` dependency)
- Allow users to read/modify only their own profile (implicit — the dependency provides their own User object)
**Workspace endpoint** (`/auth/set-active-workspace`):
- Requires valid authentication
- Likely requires the user to be a member of the target workspace (validation happens in AuthService, not this router)
- Prevents privilege escalation: user cannot switch to a workspace they don't have access to
### Security Best Practices Evident
- **No direct database access** in endpoints: all logic in service layer
- **Input validation** via Pydantic schemas (ProfileUpdateRequest, SetWorkspaceRequest)
- **Dependency injection for auth**: cannot accidentally call endpoints without auth checks
- **Password hashing** delegated to fastapi-users (not visible here but used during registration/login)
## Dependencies and Integration
### What This Module Imports
| Import | Purpose |
|--------|----------|
| `fastapi.APIRouter, Depends` | Core FastAPI routing and dependency injection |
| `ee.cloud.auth.core` | Provides `fastapi_users`, auth backends (cookie, bearer), `current_active_user`, schema models (UserRead, UserCreate) |
| `ee.cloud.auth.schemas` | Request/response models: ProfileUpdateRequest, SetWorkspaceRequest |
| `ee.cloud.auth.service` | AuthService class with profile and workspace business logic |
| `ee.cloud.models.user` | User ORM model |
**Note on unused imports**: The module imports from many other ee.cloud domains (license, knowledge, user, ws, group, message, errors, backend_adapter, workspace) but doesn't directly use them. These likely come from the `__init__.py` which might re-export this router alongside other domain routers.
### What Imports This Module
| Importer | Usage |
|----------|-------|
| `ee.cloud.auth.__init__` | Includes `router` in the auth domain's public API |
| Root FastAPI application | Includes this router to expose `/auth/*` endpoints |
### Integration Points
1. **AuthService** (`ee.cloud.auth.service`): Handles all business logic for profile/workspace operations
2. **Authentication Core** (`ee.cloud.auth.core`): Configures FastAPI-users, manages backends
3. **User Model** (`ee.cloud.models.user`): Represents authenticated users and their workspace memberships
4. **Workspace domain**: The `set_active_workspace` endpoint connects to workspace management (user selects which workspace to work in)
## Design Decisions
### 1. **Thin Router, Thick Service Layer**
The router endpoints are intentionally minimal — they accept parameters, delegate to AuthService, and return responses. Business logic (validation, database updates) lives in the service layer. This makes it easy to:
- Add new HTTP transports (gRPC, webhooks) without duplicating logic
- Test business logic independently of HTTP details
- Change HTTP contracts without rewriting core logic
### 2. **Separate Authentication Backends**
Offering both cookie and bearer token routes acknowledges different client needs:
- Browsers use cookies (simpler, less config, CSRF-protected by convention)
- APIs/mobile apps use bearer tokens (stateless, scalable, no session storage)
Both point to the same validation logic, minimizing duplication.
### 3. **Inclusion of Pre-Built fastapi-users Routers**
Instead of reimplementing login/logout/register, the module reuses fastapi-users' battle-tested implementations. This:
- Reduces security bugs (password hashing, token validation already proven)
- Follows industry standards (OAuth2, OpenID)
- Saves development time
- Makes the custom endpoints (get_me, update_me, set_active_workspace) stand out as PocketPaw-specific logic
### 4. **Workspace as a First-Class Auth Concern**
The `set_active_workspace` endpoint at the auth layer signals that workspace selection is core to the system's identity model, not an afterthought. Users don't just authenticate — they authenticate *into a workspace*.
### 5. **Dependency Injection Over Middleware**
Using `Depends(current_active_user)` rather than middleware for auth checks:
- **Explicit**: each endpoint declares its auth requirement
- **Flexible**: some endpoints could theoretically be public (though none are here)
- **Testable**: dependencies can be easily mocked
## Related Concepts
To fully understand this module, you should also study:
- **FastAPI dependency injection**: How `Depends()` works
- **fastapi-users library**: The OAuth2 framework underpinning auth
- **JWT and bearer tokens**: Stateless authentication for APIs
- **Workspace scoping**: How multi-tenant separation works in PocketPaw
- **AuthService** (`ee.cloud.auth.service`): The business logic layer
- **Authentication Core** (`ee.cloud.auth.core`): Backend and dependency configuration
---
## Related
- [schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations](schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations.md)
- [untitled](untitled.md)
- [license-enterprise-license-validation-and-feature-gating-for-cloud-deployments](license-enterprise-license-validation-and-feature-gating-for-cloud-deployments.md)
- [deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth](deps-fastapi-dependency-injection-layer-for-cloud-router-authentication-and-auth.md)
- [core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi](core-enterprise-jwt-authentication-with-cookie-and-bearer-transport-for-fastapi.md)
- [group-multi-user-chat-channels-with-ai-agent-participants](group-multi-user-chat-channels-with-ai-agent-participants.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [backendadapter-adapter-that-makes-pocketpaws-agent-backends-usable-as-knowledge](backendadapter-adapter-that-makes-pocketpaws-agent-backends-usable-as-knowledge.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)

View File

@@ -0,0 +1,190 @@
# schemas — Pydantic models for authentication request/response validation
> This module defines three Pydantic BaseModel classes that standardize the shape of authentication-related HTTP requests and responses across the PocketPaw auth domain. It exists as a separate schemas module to centralize data validation contracts, enabling clean separation between HTTP layer concerns (routers) and business logic (services), and ensuring consistency across multiple consumers that import from this file.
**Categories:** auth domain, API schemas and data models, HTTP validation layer, system-wide contracts
**Concepts:** ProfileUpdateRequest, SetWorkspaceRequest, UserResponse, Pydantic BaseModel, from_attributes (ORM integration), HTTP request/response validation, partial updates (PATCH semantics), multi-workspace architecture, type safety, schema-driven API design
**Words:** 1457 | **Version:** 1
---
## Purpose
The `schemas` module is the **contract layer** for the authentication domain. It serves as the single source of truth for what request bodies and response bodies should look like when clients interact with auth endpoints.
Why separate it from service or router logic? Because:
1. **Validation Separation**: Pydantic handles all input validation automatically. When a router receives a request, Pydantic validates it against one of these schemas before the route handler even runs.
2. **Reusability**: Multiple parts of the system need to reference the same shape—routers validate against them, services may reference them for type hints, and external clients can inspect them for API documentation.
3. **Contract Clarity**: These schemas act as the documented interface between the HTTP layer and internal services. They define what the system will accept and what it will return.
4. **Evolutionary Flexibility**: If you need to change response structure, you change it here once, and all consumers (routers, services, message handlers, websockets, agent bridge) automatically adapt.
## Key Classes and Methods
### `ProfileUpdateRequest`
**Purpose**: Validates partial user profile updates. Allows clients to update any combination of display name, avatar, and status.
**Fields**:
- `full_name: str | None = None` — User's display name. Optional; `None` means "don't change this."
- `avatar: str | None = None` — Avatar URL or image data. Optional.
- `status: str | None = None` — User status message (e.g., "In a meeting"). Optional.
**Business Logic**: This is a **partial update** schema—all fields are nullable by design. The service layer (likely `AuthService`) receives this, checks which fields are non-`None`, and only updates those attributes. This prevents accidental overwrites of unchanged fields.
**Usage Pattern**: When a client sends `PATCH /users/profile`, the request body is validated against this schema before reaching the handler.
### `SetWorkspaceRequest`
**Purpose**: Validates workspace activation requests. When a user has access to multiple workspaces, they must explicitly select one as their active workspace.
**Fields**:
- `workspace_id: str` — Required identifier of the workspace to activate. Non-optional; the request is invalid without it.
**Business Logic**: This is a **required-field** schema. Setting a workspace is a deliberate action, not optional. The service layer will:
1. Verify the user has access to this workspace (authorization check)
2. Update the user's `active_workspace` field
3. Possibly trigger downstream effects (reload configuration, reset cached permissions, etc.)
**Usage Pattern**: `POST /workspaces/set` or similar endpoint. Used by frontend when user clicks "Switch Workspace."
### `UserResponse`
**Purpose**: Serializes authenticated user data back to clients. This is the response schema for login, profile fetch, or token refresh endpoints.
**Fields**:
- `id: str` — Unique user identifier (likely UUID or MongoDB ObjectId string)
- `email: str` — User's email address
- `name: str` — Display name
- `image: str` — Avatar URL or data URI
- `email_verified: bool` — Whether email has been verified
- `active_workspace: str | None = None` — Currently selected workspace ID, or `None` if not set
- `workspaces: list[dict]` — Array of workspace objects the user can access. Each dict likely contains `{"id": "...", "name": "...", ...}` structure.
**Pydantic Config**: `model_config = {"from_attributes": True}` enables Pydantic to accept ORM objects (e.g., SQLAlchemy models or Beanie documents) and extract attributes automatically. This means a service can do:
```python
user_doc = User.get(user_id) # Returns ORM/Beanie object
return UserResponse.model_validate(user_doc) # Pydantic extracts attributes automatically
```
Without this config, you'd need to manually map: `UserResponse(id=user_doc.id, email=user_doc.email, ...)` on every response.
**Business Logic**: This schema defines the "current user" contract. Whenever any handler needs to return user info, it uses this schema. The presence of `workspaces` (plural) indicates the system supports **multi-workspace architecture**—a single user can belong to multiple workspaces and switch between them.
## How It Works
### Request Validation Flow
1. **Client sends HTTP request** with JSON body
2. **FastAPI router decorator** specifies a schema class (e.g., `@router.patch("/profile", model=ProfileUpdateRequest)`)
3. **Pydantic parses and validates** the incoming JSON against the schema
4. **If valid**: Request handler receives a typed Python object (e.g., `profile_update: ProfileUpdateRequest`)
5. **If invalid**: FastAPI returns `422 Unprocessable Entity` with detailed validation errors; handler never runs
### Response Serialization Flow
1. **Service layer returns domain object** (e.g., a Beanie `User` document or ORM model)
2. **Router calls** `UserResponse.model_validate(user_doc)`
3. **Pydantic extracts fields** (using `from_attributes=True`) and builds a `UserResponse` instance
4. **FastAPI serializes** the `UserResponse` to JSON and sends it to client
5. **Client receives** guaranteed-valid shape
### Edge Cases
- **Partial updates**: `ProfileUpdateRequest` allows all-`None` fields. A client could send `{}` (empty JSON object). The service must handle this (likely doing nothing) rather than failing.
- **Workspace access control**: `SetWorkspaceRequest` only contains an ID. The **service layer** must verify the user actually has access to that workspace. This schema doesn't enforce that.
- **Missing workspaces**: If a user has no workspaces, `workspaces: list[dict]` will be an empty list `[]`. Frontend must handle this gracefully.
- **Null active_workspace**: A newly registered user might not have set an active workspace yet, so this field could be `None`.
## Authorization and Security
These schemas do **not** contain authorization logic—they only validate structure and types. Authorization happens at the **service or router middleware level**:
- **ProfileUpdateRequest**: Only the authenticated user (or admins) can update their own profile. Router middleware checks `request.user.id == profile_owner_id`.
- **SetWorkspaceRequest**: Router/service must verify the user is a member of the target workspace. This prevents users from "switching" to workspaces they don't belong to.
- **UserResponse**: Never expose sensitive fields (e.g., password hashes, API keys). This schema only includes safe-to-expose fields.
## Dependencies and Integration
### What imports this module?
From the import graph, **5 files depend on these schemas**:
1. **router** (`ee/cloud/auth/router.py`) — Uses all three schemas as request/response models for HTTP endpoints
2. **service** (`ee/cloud/auth/service.py`) — May use as type hints for return values
3. **group_service** (`ee/cloud/group_service.py` or similar) — Likely returns `UserResponse` when group operations affect users
4. **message_service** (`ee/cloud/message_service.py`) — May return user info in message payloads; uses `UserResponse`
5. **ws** (WebSocket handler) — Sends `UserResponse` in WebSocket messages to connected clients
6. **agent_bridge** (Agent/AI integration) — Returns user info when agent needs context about who initiated a request
This wide distribution indicates that **user response format is a system-wide contract**—it's not just an auth concern, but part of the core data model visible throughout the application.
### What does this module depend on?
Minimal dependencies:
- **pydantic** (standard library import) — Provides `BaseModel`
- **Python 3.10+** (type hints use `X | None` syntax instead of `Union[X, None]`)
No domain dependencies, no circular imports. This is by design—schemas modules should be dependency-light so they can be imported everywhere without creating dependency cycles.
## Design Decisions
### 1. Pydantic BaseModel (not dataclasses or TypedDict)
Why not `@dataclass` or `TypedDict`?
- **Validation**: `BaseModel` validates on instantiation. `@dataclass` does not.
- **Serialization**: `BaseModel.model_dump()` and `model_dump_json()` are built-in. Dataclasses need manual serialization.
- **ORM integration**: `from_attributes=True` bridges ORM objects easily. Dataclasses don't have this.
- **JSON schema generation**: FastAPI auto-generates OpenAPI docs from Pydantic schemas. Dataclasses don't integrate as cleanly.
### 2. Partial vs. Required Fields
- `ProfileUpdateRequest`: All fields optional (`None` defaults) — **partial update pattern**
- `SetWorkspaceRequest`: Required `workspace_id`**explicit command pattern**
- `UserResponse`: All fields required (no defaults) — **complete data contract**
This design mirrors REST semantics: `PATCH` (partial), `POST` (explicit action), `GET` (full state).
### 3. `active_workspace: str | None`
Why nullable? Because:
- A newly registered user might not have selected a workspace yet
- A user's only workspace might have been deleted or they lost access
- Lazy initialization—don't force workspace selection during signup
Frontend must handle `None` gracefully (prompt user to select workspace, or auto-assign one).
### 4. `workspaces: list[dict]` (not `list[WorkspaceResponse]`)
Why use `list[dict]` instead of a separate `WorkspaceResponse` schema?
Likely reasons:
1. **Simplicity**: Workspace details aren't standardized yet, or vary by context
2. **Flexibility**: Each workspace object might have different fields (metadata, permissions, role, etc.) without needing another schema
3. **Deferred definition**: Workspace schema might live in a separate `workspace/schemas.py` module, and auth module avoids the cross-domain dependency
This is a trade-off: flexibility vs. type safety. As the system matures, this might become `list[WorkspaceResponse]` to add structure.
### 5. `from_attributes = True` Config
This is a **Pydantic v2 convention** (previously `orm_mode = True` in v1). It assumes:
- Domain objects are ORM models or Beanie documents
- They have attributes matching schema field names
- No custom mapping logic needed in services
This keeps services thin: no manual `UserResponse(id=..., email=...)` boilerplate.
## Related Concepts
- **Request/Response Validation**: Core HTTP pattern. Schemas = contracts.
- **ORM Integration**: `from_attributes` bridges database models to HTTP responses.
- **Multi-workspace Architecture**: The presence of `active_workspace` and `workspaces` list indicates the system supports user-to-many-workspaces relationships.
- **Partial Updates**: `ProfileUpdateRequest` with nullable fields is PATCH semantics.
- **Type Safety with Pydantic**: Compile-time type hints + runtime validation.
---
## Related
- [untitled](untitled.md)

View File

@@ -0,0 +1,192 @@
# schemas — Pydantic request/response contracts for session lifecycle operations
> This module defines the HTTP API contracts (request bodies and response payloads) for the sessions domain using Pydantic BaseModel. It exists to enforce type safety and validation at the API boundary, ensuring that clients can only submit well-formed session creation/update requests and receive consistently-shaped session responses. As a schema module, it serves as the contract layer between the FastAPI router and the business logic, used by 5 downstream consumers (router, service, group_service, message_service, ws, agent_bridge).
**Categories:** sessions domain, API contract layer, data validation, CRUD operations
**Concepts:** CreateSessionRequest, UpdateSessionRequest, SessionResponse, Pydantic BaseModel, API contract layer, request/response schemas, type validation, soft delete pattern, denormalization, dual ID strategy
**Words:** 1531 | **Version:** 1
---
## Purpose
This module defines the **data contracts** for HTTP requests and responses in the sessions domain. It solves the problem of:
1. **Type Safety at the Boundary**: FastAPI uses these Pydantic models to validate incoming JSON and automatically reject malformed requests before they reach business logic
2. **Documentation**: The field definitions serve as OpenAPI/Swagger documentation; clients know exactly what fields are required/optional
3. **Consistency**: All callers (HTTP handlers, async services, WebSocket handlers, agent bridges) operate against the same schema definitions, reducing duplication and drift
4. **Decoupling**: Router handlers don't directly depend on the persistence model (MongoDB document); they depend on these schemas, allowing the internal model to evolve without breaking the API
In the system architecture, schemas sit at the **API contract layer**—above the service layer but below HTTP delivery. They transform between the wire format (JSON) and Python objects that services consume.
## Key Classes and Methods
### CreateSessionRequest
Represents the payload required to create a new session.
**Fields:**
- `title: str` — User-facing name for the session. Defaults to `"New Chat"` if omitted, allowing clients to create a session without specifying a title.
- `pocket_id: str | None` — Optional link to a "pocket" (likely a container/project/space concept) at creation time. Clients can omit this to create an unlinked session.
- `group_id: str | None` — Optional association with a group. The system allows null; business logic determines if this is meaningful.
- `agent_id: str | None` — Optional association with an AI agent. Enables agent-specific sessions (e.g., a session for a particular chatbot).
**Business Logic Notes:**
The presence of `pocket_id`, `group_id`, and `agent_id` suggests sessions can exist in multiple organizational contexts. A single session might belong to a workspace, but optionally nest within a pocket, belong to a group, and/or be associated with an agent. The schema doesn't enforce mutual exclusivity, allowing flexible linking strategies.
### UpdateSessionRequest
Represents the payload for partial session updates.
**Fields:**
- `title: str | None` — Update the session title. Null means "don't change."
- `pocket_id: str | None` — Relink or unlink the session from a pocket. Null is semantically ambiguous (does it mean "remove link" or "don't change"?); likely requires careful service-layer interpretation.
**Business Logic Notes:**
Notably, this schema does **not** allow updating `group_id` or `agent_id` after creation. This suggests those associations are considered immutable or require different endpoints. The `pocket_id` is updatable, implying sessionpocket relationships are meant to be flexible.
### SessionResponse
The shape of a session in GET responses and after mutations.
**Fields:**
- `id: str` — Primary key (likely MongoDB ObjectId as string)
- `session_id: str` — Unique session identifier. Distinct from `id`; likely a friendly snowflake ID or UUID, used for external APIs and user-facing URLs.
- `workspace: str` — Every session is scoped to a workspace (multi-tenancy isolation)
- `owner: str` — User ID who created/owns the session
- `title: str` — The session's name
- `pocket: str | None` — Denormalized reference to the linked pocket (or null)
- `group: str | None` — Denormalized reference to the linked group (or null)
- `agent: str | None` — Denormalized reference to the linked agent (or null)
- `message_count: int` — Cached count of messages in this session (denormalized for performance)
- `last_activity: datetime` — Timestamp of the most recent message or event
- `created_at: datetime` — Creation timestamp
- `deleted_at: datetime | None` — Soft-delete timestamp. Null means active; non-null means logically deleted but retained for auditing
**Design Notes:**
The response includes both `id` and `session_id`, suggesting internal IDs differ from external IDs. Denormalized fields (`message_count`, `pocket`, `group`, `agent`) indicate the response is pre-computed or aggregated by the service layer, not a direct database dump. The `deleted_at` field reveals a soft-delete strategy (logical deletion with retention).
## How It Works
### Data Flow
1. **Client sends HTTP request** (e.g., POST `/sessions` with JSON body)
2. **FastAPI receives JSON****Pydantic validates** against `CreateSessionRequest`
- If validation fails (missing required field, wrong type), FastAPI returns 422 Unprocessable Entity with detailed errors
- If valid, FastAPI hydrates the `CreateSessionRequest` object
3. **Router handler receives the validated object** → calls `SessionService.create(request)`
4. **Service layer** transforms the schema into a database model, persists it, and returns a populated `SessionResponse`
5. **Router serializes the response** as JSON and returns it to the client
### Request Validation Examples
**Valid CreateSessionRequest:**
```json
{"title": "Project Planning", "pocket_id": "poc_123", "agent_id": "agent_456"}
```
Will be accepted; `group_id` is inferred as `null`.
**Invalid CreateSessionRequest:**
```json
{"pocket_id": "poc_123"}
```
Will be accepted; `title` defaults to `"New Chat"`, and `group_id`, `agent_id` default to `null`.
**Invalid UpdateSessionRequest:**
```json
{"title": 123}
```
Will be rejected by Pydantic (title must be `str` or `None`, not `int`).
### Edge Cases
1. **Null pocket_id in UpdateSessionRequest**: The schema allows it, but the service layer must decide: does it mean "unlink the pocket" or "don't update the pocket field"? This is a common ambiguity in PATCH operations; the service likely has a convention (e.g., explicit `null` = unlink, field omitted = no change).
2. **Soft Deletes**: The response includes `deleted_at`. Clients should either filter these out or a service layer pre-filters GET responses to exclude soft-deleted sessions.
3. **Denormalization**: Fields like `message_count` and `last_activity` are snapshots at the time of the response. Concurrent messages may age these values immediately; this is a trade-off for read performance.
## Authorization and Security
**Not explicitly defined in this module.** However:
- **Workspace Scoping**: Every session has a `workspace` field. The router/service layer should validate that the authenticated user has access to that workspace before allowing read/write.
- **Ownership**: The `owner` field suggests only the owner (or admins) can update a session.
- **Field Exposure**: The response includes `owner` and `workspace`, allowing clients to verify access control rules client-side or for auditing.
Actual authorization logic lives in the router or a middleware layer (not shown here), but this schema enables those guards by exposing the necessary context.
## Dependencies and Integration
### Consumers (Import Graph)
This module is imported by:
1. **router** — HTTP handlers that accept `CreateSessionRequest` and `UpdateSessionRequest` as body parameters, return `SessionResponse`
2. **service** — The SessionService accepts requests and returns responses; may transform request fields into database operations
3. **group_service** — Likely retrieves sessions linked to a group; uses schemas for type hints and response consistency
4. **message_service** — Operates on sessions; may update `last_activity` or `message_count` fields in the response
5. **ws** — WebSocket handlers that deserialize session data and send `SessionResponse` over the wire
6. **agent_bridge** — External agent integration that reads/writes sessions; needs consistent contracts
### No Internal Dependencies
This module does not import from other modules in the scanned set, keeping it isolated and free from circular dependencies. It only depends on:
- **pydantic** (external): The BaseModel, Field utilities for validation and serialization
- **datetime** (stdlib): For `datetime` type hints
### Integration Pattern
The schema acts as a **contract layer**:
```
HTTP Client
↓ (JSON)
FastAPI Router
↓ (CreateSessionRequest object)
SessionService
↓ (transforms to DB model, executes logic)
MongoDB
↓ (fetches/persists)
SessionService
↓ (transforms DB model to SessionResponse)
FastAPI Router
↓ (JSON serialization via Pydantic)
HTTP Client
```
Each layer depends on the schema contracts, but not on each other's internal representations.
## Design Decisions
### 1. **Dual ID Strategy** (`id` vs. `session_id`)
- `id`: Likely the MongoDB ObjectId, kept internal for direct database queries
- `session_id`: A friendly, external ID (possibly shorter, more readable)
- **Rationale**: Decouples the public API from database internals; allows ID rotation or migration without breaking clients
### 2. **Soft Deletes via `deleted_at` Field**
- Sessions are never fully deleted; only marked with a `deleted_at` timestamp
- **Rationale**: Preserves audit trails, allows recovery, and enables "trash" features. Services must explicitly filter by `deleted_at IS NULL` in queries.
### 3. **Denormalized Fields in Response** (`message_count`, `pocket`, `group`, `agent`, `last_activity`)
- These are not raw database fields but computed/cached values
- **Rationale**: Improves client UX (no need for extra round-trips to fetch metadata) and read performance (precomputed aggregations)
- **Trade-off**: Write-path complexity; services must update these fields when related data changes
### 4. **Optional Associations** (`pocket_id`, `group_id`, `agent_id` all nullable)
- Sessions can exist without any of these links
- **Rationale**: Flexibility; different use cases may require different organizational structures (standalone sessions, pocket-scoped, group-scoped, or agent-specific)
### 5. **Immutable Group and Agent Associations**
- `UpdateSessionRequest` does not allow changing `group_id` or `agent_id`
- **Rationale**: Likely these are architectural dependencies that should not be reassigned post-creation; changing them might violate business logic or require cascade operations
- **Pocket is mutable**: Suggests pockets are more like tags or lightweight containers; sessions can move between them
### 6. **Pydantic's `from_attributes=True` (implicit)**
While not shown, FastAPI likely configures Pydantic with `from_attributes=True` to allow automatic ORM object serialization (MongoDB documents to SessionResponse). The service layer likely uses this to cast database objects directly to the schema.
## Architectural Context
**Schemas** are part of the **API layer**, sitting between:
- **Presentation** (HTTP, WebSocket, external APIs) — receives/returns these models
- **Business Logic** (Service layer) — consumes and produces these models
- **Persistence** (MongoDB models) — different structure, transformed to/from schemas
This module enforces the **contract-first** design pattern: the API contract is explicit and comes before implementation, reducing surprises and enabling early validation.
---
## Related
- [untitled](untitled.md)

View File

@@ -0,0 +1,206 @@
# schemas — Pydantic request/response data models for workspace domain operations
> This module defines the contract between the workspace API layer and its consumers by providing Pydantic data models for validating incoming requests and serializing outgoing responses. It exists to centralize workspace-related data validation and type safety in one place, ensuring consistency across the router, service layer, and external integrations (agent_bridge, ws) that need to understand workspace operations. It serves as the domain-level API boundary for all workspace CRUD, invite management, and member role operations.
**Categories:** workspace domain, API contract layer, data validation, schema definition, Pydantic DTOs
**Concepts:** CreateWorkspaceRequest, UpdateWorkspaceRequest, CreateInviteRequest, UpdateMemberRoleRequest, WorkspaceResponse, MemberResponse, InviteResponse, validate_slug, field_validator, BaseModel
**Words:** 1866 | **Version:** 1
---
## Purpose
The `schemas` module is a **data contract definition layer** that sits between the HTTP API and the business logic. Its primary purposes are:
1. **Input Validation**: Validates incoming HTTP requests before they reach service logic, catching malformed data early (e.g., slug format, role values)
2. **Type Safety**: Provides structured typing through Pydantic BaseModel, enabling IDE autocomplete, static analysis, and runtime validation
3. **API Documentation**: Serves as the source of truth for what the workspace API accepts and returns, automatically documenting endpoints
4. **Cross-Layer Contract**: Creates a shared language between the router (HTTP layer), service layer, and external systems (agent_bridge for AI operations, ws for real-time events)
This is a **stateless, declarative module** — it contains no business logic, only schema definitions. It's imported by multiple downstream consumers (router, service, group_service, message_service, ws, agent_bridge) because they all need to understand the same data structures.
## Key Classes and Methods
### Request Classes (Input Validation)
#### `CreateWorkspaceRequest`
**Purpose**: Validates the creation of a new workspace.
**Fields**:
- `name` (str, 1-100 chars): The human-readable workspace name
- `slug` (str, 1-50 chars): The URL-safe identifier for the workspace (e.g., "my-team-workspace")
**Validation Logic**:
- `validate_slug()` method enforces that slugs match the pattern `^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$`
- Must start and end with alphanumeric characters
- Can contain hyphens in the middle
- Must be lowercase only
- This prevents invalid URLs and domain-like identifiers
**Business Reason**: Slugs are used in URLs (`/workspace/{slug}`), so they must be URL-safe and readable. Restricting to lowercase and hyphens ensures consistency across the system.
#### `UpdateWorkspaceRequest`
**Purpose**: Validates partial updates to an existing workspace.
**Fields**:
- `name` (str | None): Optional new workspace name
- `settings` (dict | None): Optional workspace-level configuration (flexible schema for future extensibility)
**Business Reason**: All fields are optional (`None`), allowing clients to update only what they need. This is standard REST PATCH semantics.
#### `CreateInviteRequest`
**Purpose**: Validates the creation of a workspace member invitation.
**Fields**:
- `email` (str): The email address of the person being invited
- `role` (str, default="member"): The role granted to the invitee, restricted to `"admin"` or `"member"`
- `group_id` (str | None): Optional group assignment upon joining (if workspace uses group-based organization)
**Business Reason**: The role field uses a strict enum pattern (`^(admin|member)$`) to prevent invalid role assignments. The inviter shouldn't be able to create invites with invalid roles. Note that "owner" is NOT allowed here — ownership is likely assigned through different logic.
#### `UpdateMemberRoleRequest`
**Purpose**: Validates role changes for existing workspace members.
**Fields**:
- `role` (str): The new role, restricted to `"owner"`, `"admin"`, or `"member"`
**Business Reason**: Unlike `CreateInviteRequest`, this allows promotion to "owner". The pattern `^(owner|admin|member)$` ensures only valid roles are accepted. This prevents typos or injection attacks that might otherwise bypass authorization checks.
### Response Classes (Output Serialization)
#### `WorkspaceResponse`
**Purpose**: The canonical representation of a workspace returned by the API.
**Fields**:
- `id`, `name`, `slug`: Core workspace identity
- `owner` (str): The ID or email of the workspace owner
- `plan` (str): The billing plan tier (e.g., "free", "pro", "enterprise") — used by downstream services to determine feature availability
- `seats` (int): The number of member seats available on the plan
- `created_at` (datetime): Workspace creation timestamp
- `member_count` (int): Current number of active members (default 0 if not populated)
**Usage**: Returned by workspace creation, fetch, and list endpoints. The router and service layer populate this with data from the database, and it's sent to clients and potentially to agent_bridge for AI agents to understand workspace capacity and configuration.
#### `MemberResponse`
**Purpose**: Represents a workspace member in API responses.
**Fields**:
- `id`, `email`, `name`, `avatar`: Member identity and profile
- `role` (str): The member's current role (owner/admin/member)
- `joined_at` (datetime): When the member joined the workspace
**Usage**: Returned when listing workspace members or fetching member details. The avatar field allows the UI to display member pictures. The `joined_at` field provides audit information.
#### `InviteResponse`
**Purpose**: Represents a pending or accepted workspace invitation.
**Fields**:
- `id`, `email`, `role`: Invitation core data
- `invited_by` (str): The ID/email of who sent the invitation (for audit trail)
- `token` (str): The unique acceptance token (used in accept-invite endpoints, typically sent via email)
- `accepted`, `revoked`, `expired` (bool): Invitation status flags
- `expires_at` (datetime): When the invitation becomes invalid
**Business Reason**: Separating invitation state into three boolean fields (`accepted`, `revoked`, `expired`) makes the state machine explicit. An invitation can be revoked before expiration, or naturally expire. The token is a security credential that prevents anyone with just the email from accepting an invite.
## How It Works
### Data Flow
1. **Inbound Request**: An HTTP client sends a POST to `/workspace/create` with a JSON body
2. **Pydantic Validation**: FastAPI (used by the router) automatically instantiates `CreateWorkspaceRequest` from the JSON. If validation fails (e.g., slug has uppercase letters), Pydantic raises a validation error and FastAPI returns a 422 Unprocessable Entity response
3. **Service Layer Call**: If validation passes, the router calls the service layer with the validated request object
4. **Database Operation**: The service layer creates the workspace in the database
5. **Response Serialization**: The service returns data that's mapped into `WorkspaceResponse`, which FastAPI serializes to JSON
6. **Client Receipt**: The client receives the workspace details
### Cross-System Usage
- **router**: Uses request classes to validate incoming HTTP bodies, response classes to serialize database objects
- **service**: Accepts request objects, uses them to validate/transform data before database operations, returns raw data that service consumers (router) serialize using response classes
- **group_service**, **message_service**: May depend on response schemas when operating within workspace scope (e.g., verifying workspace exists before creating groups/messages)
- **ws** (WebSocket handler): Uses response classes to serialize real-time workspace events sent to connected clients
- **agent_bridge**: Uses response classes to understand workspace structure and permissions when executing AI agent operations (e.g., an AI agent needs to know the `plan` to determine available features)
### Edge Cases and Validation
- **Slug Validation**: The regex `^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$` allows single-character slugs (second alternative) or multi-character slugs with hyphens in the middle. This prevents invalid slugs like `-invalid`, `invalid-`, or `INVALID`.
- **Optional Fields**: `UpdateWorkspaceRequest` and `CreateInviteRequest.group_id` are optional, allowing partial updates and conditional group assignment.
- **Role Enums**: The strict pattern on role fields prevents invalid values. If a future role type is added (e.g., "editor"), all these patterns must be updated simultaneously — this is intentional to force explicit migration.
## Authorization and Security
This module defines the **shape** of data but not the **authorization logic**. However, the schemas support authorization checks downstream:
- **Role Pattern Restrictions**: By restricting roles to known values (`admin|member|owner`), the schemas prevent role injection attacks. A malicious client cannot craft a request with `role="superuser"` — Pydantic will reject it.
- **Slug Format**: The slug validation prevents directory traversal or injection attacks that might exploit URL patterns (e.g., `/workspace/../../admin`).
- **Token in InviteResponse**: The `token` field is a security credential. Only the legitimate invitee who receives the email should have this token. The service layer (not this module) is responsible for validating the token matches the email before accepting an invite.
- **No Password Fields**: Notably, these schemas don't include passwords. Password management is likely handled in a separate auth module, which is good security practice (separation of concerns).
**Authorization is enforced upstream**: The router layer uses these schemas to validate format, then calls authorization middleware/decorators to check whether the requesting user is allowed to perform the operation (e.g., only workspace owners can update workspace settings).
## Dependencies and Integration
### Internal Dependencies
- **pydantic** (BaseModel, Field, field_validator): Core data validation framework. No database ORM (Beanie, SQLAlchemy) appears in this module, keeping it framework-agnostic.
- **datetime**: Used in `WorkspaceResponse`, `MemberResponse`, `InviteResponse` for timestamps.
- **re**: Used for slug pattern validation.
### Consumers (Inbound Dependencies)
- **router** (`/cloud/workspace/router.py`): Uses request schemas to validate API payloads, response schemas to serialize responses.
- **service** (`/cloud/workspace/service.py`): Accepts request objects, returns data that response classes wrap.
- **group_service**, **message_service**: May validate operations within workspace scope using response schemas.
- **ws** (WebSocket): Serializes real-time workspace events using response classes.
- **agent_bridge**: Deserializes `WorkspaceResponse` to understand workspace configuration for AI operations.
### Design Pattern: Request/Response Separation
This module uses the **DTO (Data Transfer Object) pattern**, split into two categories:
- **Request DTOs**: Validate and shape client input
- **Response DTOs**: Serialize and shape service output
This separation allows the service layer to accept flexible input and return rich output without coupling the HTTP contract to the database model.
## Design Decisions
### 1. **Pydantic BaseModel over Dataclasses**
Pydantic was chosen (not standard dataclasses) because it provides runtime validation, serialization, and automatic OpenAPI documentation generation. Dataclasses would require manual validation logic.
### 2. **Regex Validation for Slug**
The `validate_slug()` method uses a custom regex pattern rather than a library-provided slug validator. This suggests:
- **Explicit Control**: The team wanted precise control over what constitutes a valid slug in their domain (e.g., hyphens allowed, single-char allowed).
- **Documentation**: The pattern is readable and self-documenting.
- **No External Dependencies**: Avoids a library import for a simple pattern.
### 3. **Optional Fields in Update Requests**
`UpdateWorkspaceRequest` uses `| None` syntax (Python 3.10+ union types) for all fields. This allows clients to omit fields they don't want to change, implementing proper REST PATCH semantics.
### 4. **Separate CreateInviteRequest and UpdateMemberRoleRequest**
These could have been a single schema, but they're separate because:
- **Different Constraints**: CreateInviteRequest restricts roles to `admin|member` (logical: you can't invite someone as an owner). UpdateMemberRoleRequest allows `owner|admin|member` (logical: you can promote a member to owner).
- **Different Fields**: CreateInviteRequest has `group_id`; UpdateMemberRoleRequest doesn't.
- **Intent Clarity**: Separate classes make the intent explicit in the code and API documentation.
### 5. **Flexible Settings Field**
`UpdateWorkspaceRequest.settings` is typed as `dict | None`, not a strict schema. This suggests:
- **Forward Compatibility**: Settings can evolve without schema changes.
- **Trade-off**: Loses validation of settings structure at the schema layer. Validation is pushed to the service layer or database layer.
### 6. **Field Defaults and Patterns**
- `CreateInviteRequest.role` defaults to `"member"` — most invitations are probably member-level, so the client doesn't need to specify it.
- Role fields use `pattern` rather than an enum. Pydantic enums would be stricter but less flexible if roles change. Patterns are validated at serialization but allow the underlying data to be a string.
### 7. **Explicit Boolean Flags in InviteResponse**
Instead of a single `status` enum field (e.g., `status: "pending" | "accepted" | "expired"`), the schema uses three booleans: `accepted`, `revoked`, `expired`. This allows the database/service to represent states more flexibly (e.g., an expired invite can also be marked as revoked). The downside is that clients need to interpret multiple flags, but this is likely intentional to support complex state machines.
## Architectural Notes
- **Stateless and Declarative**: This module has no state, no async operations, no side effects. It's purely a declarative contract.
- **Framework Agnostic (Almost)**: The only framework dependency is Pydantic. The schemas don't import from FastAPI, database, or service modules, making them portable.
- **Single Responsibility**: Each class is focused on a single operation (Create, Update, Response), following the Single Responsibility Principle.
- **Validation as a Defensive Layer**: By validating at the schema layer, the downstream service and database layers can assume data is well-formed, reducing defensive programming and bugs.
---
## Related
- [untitled](untitled.md)

View File

@@ -0,0 +1,158 @@
# schemas — Pydantic request/response models for agent lifecycle and discovery operations
> This module defines four Pydantic BaseModel classes that serve as the contract layer between HTTP clients and the agent management system. It exists to provide strict input validation, type safety, and clear API documentation for agent creation, updates, discovery queries, and response serialization. By centralizing schema definitions, it ensures consistency across the router, service layer, group operations, messaging, WebSocket handlers, and agent bridge components.
**Categories:** agents domain, API layer, data model, CRUD schema definition
**Concepts:** CreateAgentRequest, UpdateAgentRequest, DiscoverRequest, AgentResponse, Pydantic BaseModel, Request/Response Schema Pattern, PATCH semantics, Visibility enum (private/workspace/public), OCEAN personality model, soul_archetype, soul_values, soul_ocean
**Words:** 1503 | **Version:** 1
---
## Purpose
The `schemas` module is the **API contract definition layer** for the agents domain in the PocketPaw system. Its primary purposes are:
1. **Input Validation**: Enforce business rules at the API boundary (e.g., agent names must be 1-100 characters, visibility must be one of three enum values, pagination page must be ≥1).
2. **Type Safety**: Provide Pydantic models that enable mypy/IDE type checking and runtime type coercion.
3. **API Documentation**: Serve as the schema source for OpenAPI/Swagger generation, making the agent API self-documenting.
4. **Cross-layer Contract**: Act as the common language between HTTP handlers (router), business logic (service), real-time handlers (ws), and integrations (agent_bridge, group_service, message_service).
This module exists as separate from service or database layers because schemas represent **client-facing contracts**, not internal domain models. A request schema might differ from a stored entity schema (e.g., UpdateAgentRequest has all-optional fields for PATCH semantics, while the stored Agent entity has required fields).
## Key Classes and Methods
### CreateAgentRequest
**Purpose**: Validates and structures data required to create a new agent.
**Fields**:
- `name` (str, 1-100 chars): Human-readable agent name, required.
- `slug` (str, 1-50 chars): URL-safe identifier, required.
- `avatar` (str): Optional profile image URL or base64 data; defaults to empty string.
- `visibility` (str, enum): Privacy level restricting who can discover the agent. Must be one of: `"private"` (owner only), `"workspace"` (workspace members), `"public"` (all users). Defaults to `"private"`.
- **Agent Config Fields**: `backend`, `model`, `persona` define which LLM backend and model to use. `backend` defaults to `"claude_agent_sdk"`; others default to empty strings, indicating the service should apply workspace or system defaults.
- **Optional Overrides**: `temperature` (float), `max_tokens` (int), `tools` (list[str]), `trust_level` (int), `system_prompt` (str) allow callers to customize inference behavior. All default to None, meaning "use service defaults."
- **Soul Customization Fields**: `soul_enabled`, `soul_archetype`, `soul_values`, `soul_ocean` (dict of personality traits) support the OCEAN personality model. This suggests agents have psychological/personality dimensions beyond just language model configuration.
**Business Logic**: This request represents the minimal required data to instantiate an agent. The presence of soul fields hints that agents are not just prompts + model configs, but have personality representation.
### UpdateAgentRequest
**Purpose**: Validates partial updates to an existing agent (PATCH semantics).
**Key Difference from CreateAgentRequest**: All fields are optional (`| None`). This allows clients to update only the fields they care about.
**Fields**:
- Mirrors CreateAgentRequest's fields but with None defaults.
- Additional `config` (dict) field allows arbitrary backend-specific configuration to be passed through without schema validation, providing extensibility for unforeseen agent config keys.
**Business Logic**: The None defaults mean the router/service must distinguish between "field not provided" (remains None, no update) and "field provided as None/empty" (explicit deletion/clearing). The `config` dict is a **catch-all escape hatch** for agent-specific settings that don't fit the top-level schema.
### DiscoverRequest
**Purpose**: Structures parameters for agent discovery/search queries.
**Fields**:
- `query` (str): Search term; defaults to empty string (may mean "return all" or "match nothing" depending on service implementation).
- `visibility` (str | None): Optional filter to limit results to agents with a specific visibility level. None = no filter.
- `page` (int, ≥1): Pagination cursor; defaults to 1 (first page).
- `page_size` (int, 1-100): Results per page; defaults to 20. Capped at 100 to prevent abuse/large memory allocations.
**Business Logic**: This is a **search/list query model**, not a mutation. The validation constraints (page ≥ 1, page_size ≤ 100) prevent common SQL injection and DOS attack vectors at the API boundary.
### AgentResponse
**Purpose**: Serialization schema for agent entities returned to clients.
**Fields**:
- `id` (str): Unique agent identifier (likely MongoDB ObjectId as string).
- `workspace` (str): Workspace ID; enables multi-tenancy and access control checks.
- `name`, `slug`, `avatar`, `visibility`: Same meaning as in CreateAgentRequest; represent the agent's public-facing properties.
- `config` (dict): The resolved agent configuration (backend, model, persona, temperature, etc.) after service-side defaults have been applied. Returned as a generic dict rather than a structured Pydantic model, suggesting the service handles flattening/nesting.
- `owner` (str): User ID of the agent creator.
- `created_at`, `updated_at` (datetime): Metadata for sorting, caching, and concurrency control. Pydantic automatically parses ISO8601 strings to datetime objects.
**Business Logic**: This is the **output contract**. It includes computed/derived fields (owner, timestamps, resolved config) that requests don't contain, because these are set by the service layer, not the client.
## How It Works
### Request Flow
1. **Client sends HTTP request** with JSON body (e.g., POST /agents with CreateAgentRequest data).
2. **FastAPI/Pydantic deserialization**: The router receives the raw JSON and Pydantic validates it against the schema. If validation fails, a 422 error is returned immediately with field-level error details.
3. **Service layer processes** the validated request object, applying business logic (defaults, access control, LLM calls, database writes).
4. **Response serialization**: The service returns domain objects (e.g., Agent entity from database), which are converted to AgentResponse via Pydantic serialization. The `created_at` and `updated_at` datetimes are automatically ISO8601-encoded.
### Edge Cases & Constraints
- **Empty query in DiscoverRequest**: Behavior depends on service implementation; likely returns all agents the user can see, or returns none. No explicit default behavior in the schema.
- **Optional fields in UpdateAgentRequest**: The service must check for None vs. empty string vs. missing key to avoid accidental deletions (e.g., clearing system_prompt when field was simply omitted).
- **soul_ocean as dict[str, float]**: This is a **flexible key-value structure** allowing arbitrary trait names and scores. The schema doesn't validate trait names or value ranges, enabling extensibility but risking garbage data.
- **visibility pattern validation**: The regex `^(private|workspace|public)$` is enforced at parse time, preventing invalid visibility values from reaching business logic.
## Authorization and Security
This module **enforces no authorization logic itself**; it only validates structure and type. However, it enables authorization downstream:
- **Visibility field**: Guides router/service to enforce access control. A request with `visibility="public"` will be flagged for potential audit/approval if the user is not admin.
- **Workspace scoping**: The AgentResponse includes `workspace` field, allowing API consumers to verify the agent belongs to their workspace before operations.
- **URL-safe slug**: Prevents slug-based agent enumeration or traversal attacks; slugs are constrained to 50 chars and alphanumeric-like patterns (implied, though not explicitly validated in this schema).
Note: No explicit role/permission field in the schemas suggests authorization is handled elsewhere (likely in router via dependency injection, or in service layer).
## Dependencies and Integration
### What This Module Imports
- **pydantic**: BaseModel for validation and serialization.
- **datetime**: For created_at/updated_at timestamps.
- **from __future__ import annotations**: Enables forward references and string-based type hints for cleaner Python 3.7-3.9 compatibility.
### What Depends on This Module (Import Graph)
1. **router**: Deserialization and response serialization in HTTP endpoints (e.g., POST /agents, PATCH /agents/{id}, GET /agents/discover).
2. **service**: Type hints for agent business logic; service methods likely accept CreateAgentRequest/UpdateAgentRequest and return AgentResponse or list[AgentResponse].
3. **group_service**: May accept DiscoverRequest or create DiscoverRequest-like queries to fetch agents for a group.
4. **message_service**: Likely uses AgentResponse to serialize agents referenced in messages or message metadata.
5. **ws** (WebSocket handler): Uses schemas for real-time agent events (creation, update, discovery broadcasts).
6. **agent_bridge**: Integration layer with external agent systems; likely transforms AgentResponse to/from external formats.
### Data Flow Example
```
Client (JSON)
→ FastAPI Router (deserialize via CreateAgentRequest)
→ Service.create_agent(request: CreateAgentRequest)
→ Database insert (MongoDB, Beanie ODM inferred)
→ Returns Agent entity
→ Router serializes Agent as AgentResponse
→ Client (JSON response)
```
## Design Decisions
### 1. **Separation of Request and Response Schemas**
- CreateAgentRequest and UpdateAgentRequest allow clients to provide input; AgentResponse includes server-computed fields (owner, timestamps, resolved config).
- This prevents clients from forging ownership or timestamps and makes the response contract richer than the request contract.
### 2. **All-Optional UpdateAgentRequest**
- Enables PATCH semantics (partial updates) rather than forcing full-object replacement.
- Downside: Service layer must carefully distinguish None (no update) from empty string (clear field); likely requires explicit null handling logic.
### 3. **Generic dict for config and soul_ocean**
- These fields allow arbitrary key-value data without rigid schema definition.
- **Pro**: Extensible; agents can have bespoke settings without schema migrations.
- **Con**: Runtime type errors; no IDE autocomplete; harder to validate business constraints (e.g., soul_values should not exceed 5 items).
### 4. **Visibility as String Enum Pattern**
- Uses Pydantic `pattern` validation rather than Python Enum, keeping the contract lightweight and JSON-compatible.
- Downside: No type safety on the Python side; developers must string-match or wrap in an Enum themselves.
### 5. **soul_* Fields in Core Schemas**
- The presence of soul customization (archetype, values, ocean) in the core CreateAgentRequest/UpdateAgentRequest suggests agents have **personality-first design**, not just LLM config.
- This hints at a broader system philosophy where agents are treated as autonomous entities with psychological traits, not mere prompt templates.
### 6. **Backend Default to "claude_agent_sdk"**
- Hard-coded default suggests the system primarily targets Claude; other backends are secondary/opt-in.
- Allows backward compatibility: old clients that don't specify backend will still work.
### 7. **Pagination Constraints (page ≥ 1, page_size ≤ 100)**
- Prevents edge case bugs (page 0, negative pages) and DOS attacks (requesting 10M results at once).
- Standard practice in REST APIs.
---
## Related
- [untitled](untitled.md)

View File

@@ -0,0 +1,161 @@
# schemas — Request/response data validation for the knowledge base REST API
> This module defines Pydantic request/response schemas for the knowledge base domain, providing type-safe contract definitions for REST API endpoints. It exists as a separate module to centralize data validation logic and serve as a single source of truth for API input/output structures across router, service, and messaging layers. These schemas enforce business constraints (query length, result limits, scope overrides) at the API boundary.
**Categories:** knowledge base domain, API layer, data validation, request/response contracts
**Concepts:** SearchRequest, IngestTextRequest, IngestUrlRequest, LintRequest, Pydantic BaseModel, Field constraints (min_length, ge, le), API contract, Request validation, Workspace scoping, Optional scope override
**Words:** 1453 | **Version:** 1
---
## Purpose
The `schemas` module is the **API contract layer** for the knowledge base domain. It defines the shape, validation rules, and constraints for all data flowing into and out of knowledge base operations.
**Why it exists:**
- **Single source of truth**: All consumers (REST router, internal services, WebSocket handlers, message processors) reference the same schema definitions, eliminating duplication and ensuring consistency
- **Early validation**: Pydantic validates incoming requests at the API boundary before they reach business logic, catching malformed data immediately
- **Type safety**: Python and IDE tooling can infer types from these schemas, reducing runtime errors
- **Constraint enforcement**: Encodes business rules (minimum query length, result limits) as declarative field constraints rather than scattered validation code
- **API documentation**: Serves as the specification for API consumers (can auto-generate OpenAPI/Swagger docs)
**Role in architecture:**
This module sits at the **HTTP API boundary layer**, immediately below the router. When a request arrives at a FastAPI endpoint, FastAPI uses these schemas to parse and validate the JSON payload. If validation fails, FastAPI returns a 422 Unprocessable Entity before the endpoint handler executes. If it succeeds, the endpoint receives a populated, validated model instance.
## Key Classes and Methods
### SearchRequest
Represents a knowledge base search query.
**Fields:**
- `query: str` — The search text. Must be 1+ characters (enforced by `min_length=1`). This is the primary input; empty queries are rejected at the schema level.
- `scope: str | None` — Optional workspace scope override. If provided, restricts search to that scope; if `None`, the default workspace scope is used. Allows cross-workspace queries when explicitly requested.
- `limit: int` — Result count ceiling. Defaults to 10, constrained to `ge=1, le=100` (must be between 1 and 100 inclusive). This prevents accidental or malicious unbounded result sets that could exhaust memory or timeout.
**Purpose:** Validates search operation input. Used by the search router endpoint to type-check and bound the search request before calling the search service.
### IngestTextRequest
Represents a request to add text content to the knowledge base.
**Fields:**
- `text: str` — The text content to ingest. Must be 1+ characters. Rejects empty payloads.
- `source: str` — Metadata indicating where the text came from. Defaults to `"manual"` (user-entered). Could also be `"api"`, `"upload"`, etc. Enables audit trails and content categorization without requiring it in every request.
- `scope: str | None` — Optional scope override, same as SearchRequest. Allows ingestion into a specific workspace.
**Purpose:** Validates direct text ingestion (e.g., user pastes content into a form, or programmatically pushes text via API). Distinguishes from URL-based ingestion.
### IngestUrlRequest
Represents a request to ingest content from a URL.
**Fields:**
- `url: str` — The URL to fetch and ingest. Must be 1+ characters. No further validation (e.g., no regex URL validation) at the schema level; the service layer is responsible for fetching and validating the URL actually resolves.
- `scope: str | None` — Optional scope override.
**Purpose:** Validates URL-based ingestion requests. Simpler than `IngestTextRequest` because the service must fetch and parse the URL content itself; the schema only validates the input URL string exists.
### LintRequest
Represents a request to lint/validate the knowledge base.
**Fields:**
- `scope: str | None` — Optional scope override. Allows linting a specific workspace or all knowledge base content.
**Purpose:** Triggers knowledge base linting operations (e.g., checking for malformed entries, broken links, consistency violations). Minimal schema because linting is scope-driven and takes no additional parameters in this design.
## How It Works
**Data flow:**
1. **HTTP Request arrives** → FastAPI router receives raw JSON body
2. **Pydantic parsing** → FastAPI automatically instantiates the appropriate schema class (e.g., `SearchRequest`) from the JSON
3. **Validation** → Pydantic runs all Field constraints (min_length, ge, le, etc.). If validation fails, FastAPI returns 422 with validation error details
4. **Type inference** → If validation passes, the router handler receives a fully-typed model instance (e.g., `request: SearchRequest`) with IDE autocompletion
5. **Downstream consumption** → The request model is passed to service layers (SearchService, IngestService, etc.), which can assume the data is already valid
**Key constraints in action:**
- `SearchRequest.query` with `min_length=1`: Prevents searches for empty strings. The service never sees `query=""`.
- `SearchRequest.limit` with `ge=1, le=100`: Prevents requesting 0 results (nonsensical) or 10,000 results (DoS risk). The service always receives `1 <= limit <= 100`.
- `IngestTextRequest.text` with `min_length=1`: Prevents ingesting empty content.
- `scope: str | None`: All request types allow optional scope override. If the client doesn't provide it, the application's default workspace scope is used (logic elsewhere); if provided, it overrides the default. This is optional in the schema but required by business logic at the service/router level.
**Edge cases:**
- **Whitespace-only input**: A string of spaces `" "` passes `min_length=1` validation. Trimming/sanitization is deferred to service logic.
- **Special characters in query**: No regex constraints in the schema; the search engine handles special characters.
- **Large URL strings**: The schema doesn't limit URL length; the HTTP server or reverse proxy may reject overly large payloads before reaching the schema validator.
- **None vs missing**: FastAPI distinguishes between `"scope": null` (explicitly None) and missing `scope` field (uses default None). Both result in `scope=None` at the schema level.
## Authorization and Security
This module **does not implement authorization**. It only validates data structure and format. Authorization ("Can this user access this scope?") is enforced elsewhere—likely in the router layer (via FastAPI dependency injection) or service layer.
**Security considerations:**
- **Input length constraints** (`min_length=1, le=100`) provide basic DoS mitigation by rejecting pathologically large requests.
- **Scope field** allows optional scope override, but no authorization check happens here. The router or service must verify the requesting user has permission to access that scope.
- **Type safety** prevents injection attacks by parsing structured input (JSON) into typed fields rather than string interpolation.
## Dependencies and Integration
**Dependencies (what this module needs):**
- `pydantic.BaseModel, Field` — For schema definition and validation. Pydantic is a mature, widely-used library for this pattern.
- Python 3.10+ type hints (`str | None` syntax) — Requires modern Python.
**Dependents (what uses this module):**
From the import graph, the following modules import from `schemas`:
- **router** — Uses schemas to type-hint endpoint parameters. FastAPI automatically validates incoming JSON against the schemas.
- **service** — May import schemas for type hints on internal function signatures (e.g., `def search(request: SearchRequest) -> SearchResponse`).
- **group_service, message_service** — May use schemas for cross-domain operations (e.g., message_service sends knowledge base queries on behalf of users).
- **ws** (WebSocket handler) — Receives JSON over WebSocket and validates against schemas before passing to service logic.
- **agent_bridge** — An external or autonomous agent interface that constructs and sends knowledge base requests, using schemas to understand the contract.
**Data flow map:**
```
HTTP/WebSocket Client
↓ (raw JSON)
router / ws handler
↓ (instantiate schema via Pydantic)
SearchRequest | IngestTextRequest | IngestUrlRequest | LintRequest
↓ (pass validated model)
service / group_service / message_service / agent_bridge
↓ (execute business logic)
Knowledge base operations
```
## Design Decisions
**1. Schema-per-operation pattern**
Rather than a single generic `Request` class, each operation gets its own schema (SearchRequest, IngestTextRequest, etc.). This allows operation-specific constraints:
- Search requires a `query`; ingestion does not.
- Ingestion has a `source` field; search does not.
- Lint has minimal fields.
Trade-off: More classes to maintain, but clearer contracts and better error messages ("LintRequest expects scope, not query").
**2. Optional scope override**
All schemas allow `scope: str | None`. Rather than requiring the client to know the default scope, the client can override it if needed. The application's default is used if not provided.
Trade-off: Slightly more code in services to handle the override logic, but more flexible API for multi-workspace scenarios.
**3. Constrained integers with Field(ge=..., le=...)**
The `limit` field uses Pydantic's `ge` (greater than or equal) and `le` (less than or equal) validators instead of custom validation logic. This is declarative and automatically included in generated API docs.
Trade-off: Constraints are hardcoded (1100); if you want to vary the limit globally, you'd need to change this file and restart the server.
**4. Minimal validation in schemas**
The schemas validate structure (types, lengths) but not semantics (e.g., "is this URL valid?", "does this scope exist?"). Semantic validation is deferred to service logic. This keeps schemas lightweight and focused on the HTTP API contract.
Trade-off: Service code must still validate; you don't get automatic error responses from schema validation for invalid URLs. But this is appropriate because fetching and validating a URL is a business-logic concern, not a schema concern.
**5. Pydantic BaseModel**
Using Pydantic (rather than dataclasses or hand-rolled validation) provides automatic serialization, JSON schema generation, IDE support, and a massive ecosystem. FastAPI has first-class Pydantic integration.
Trade-off: Adds a dependency; but Pydantic is already ubiquitous in modern Python web frameworks.
---
## Related
- [untitled](untitled.md)

View File

@@ -0,0 +1,222 @@
# service — Chat domain re-export facade for backward compatibility
> This module serves as a thin re-export layer for the chat domain, consolidating public APIs from two specialized service modules (GroupService and MessageService) into a single import point. It exists to maintain backward compatibility after a refactoring that split monolithic chat logic into focused, single-responsibility modules. As the primary entry point for chat operations, it bridges higher-level routers and agent systems with the underlying service implementations.
**Categories:** chat domain, service layer, architectural refactoring, backward compatibility
**Concepts:** service facade, backward compatibility layer, re-export pattern, GroupService, MessageService, _group_response, _message_response, stateless service, single responsibility principle, bounded contexts
**Words:** 1267 | **Version:** 1
---
## Purpose
This module exists as a **facade and backward compatibility layer** following a significant refactoring of the chat domain. The original monolithic `service.py` contained both group management and message handling logic, which created maintenance challenges, unclear responsibilities, and the infamous N+1 query problem in group operations.
The refactoring extracted this logic into two specialized modules:
- **`group_service.py`**: Handles group CRUD operations, membership management, and group responses (with N+1 query fixes)
- **`message_service.py`**: Handles message creation, agent message creation, and message responses
This module re-exports the public APIs from both specialized modules, allowing existing code that imports from `chat.service` to continue working without change. This is a classic **facade pattern** applied to architectural evolution.
### Role in System Architecture
The chat service layer sits between:
- **Upstream consumers**: `router.py` (FastAPI endpoints), `agent_bridge.py` (agent integration points)
- **Downstream dependencies**: Domain schemas, user/group/workspace management, message persistence, event publishing, permission checks, session management
It abstracts away implementation details while providing a clean, stable API surface for chat operations.
## Key Classes and Methods
### GroupService
**What it does**: Manages the lifecycle of chat groups (channels/conversations), including creation, updates, deletion, and membership operations.
**Exported for**: Routers and agent systems that need to perform group operations
**Business logic** (inferred from context):
- Likely provides CRUD operations for groups with workspace scoping
- Handles the N+1 query problem that plagued the original implementation (suggests optimized batch loading or selective field fetching)
- Includes permission checks via the `permissions` module
- Manages group memberships with user/workspace context
**Key methods** (imported but not detailed in source; see `group_service.py`):
- Methods for creating, reading, updating, deleting groups
- Methods for managing group memberships
- Helper: `_group_response` — formats group objects for API responses
### MessageService
**What it does**: Manages message creation, retrieval, and agent-generated messages within groups.
**Exported for**: Routers that need to post messages, agents that need to create agent-generated responses
**Business logic** (inferred from context):
- Handles message persistence with proper workspace/group scoping
- Includes new `create_agent_message` capability (noted in refactoring comment) for agent-generated content
- Integrates with event publishing (ripple_normalizer, events modules) to notify other parts of the system
- Manages message metadata and timestamps
**Key methods** (imported but not detailed in source; see `message_service.py`):
- Methods for creating messages
- Methods for creating agent-generated messages (new capability post-refactoring)
- Methods for retrieving messages with pagination/filtering
- Helper: `_message_response` — formats message objects for API responses
## How It Works
### Import and Re-export Pattern
```python
from ee.cloud.chat.group_service import GroupService, _group_response
from ee.cloud.chat.message_service import MessageService, _message_response
```
The module imports concrete implementations from specialized modules and immediately re-exports them. This pattern:
1. **Centralizes the public API**: Code importing `from ee.cloud.chat.service import GroupService` gets the same object as code importing `from ee.cloud.chat.group_service import GroupService`
2. **Maintains backward compatibility**: Old import paths continue to work during the transition period
3. **Enables gradual migration**: New code can import directly from specialized modules; old code continues through this facade
4. **Documents intent**: The `# noqa: F401` comments explicitly mark these as intentional re-exports, not unused imports
### Control Flow When Used
**Typical workflow for a group operation** (inferred from import dependencies):
1. Router receives HTTP request
2. Router calls `GroupService.create_group()` or similar
3. GroupService validates permissions via `permissions` module
4. GroupService queries/updates database (via schemas/models)
5. GroupService publishes domain events (via `events`, `ripple_normalizer`)
6. Router calls `_group_response()` helper to format the result
7. Router returns response to client
**Typical workflow for a message operation**:
1. Router receives message creation request
2. Router calls `MessageService.create_message()` or `create_agent_message()`
3. MessageService validates permissions and group membership
4. MessageService persists message to database
5. MessageService publishes events to notify subscribers
6. Router calls `_message_response()` to format output
7. Response is sent to client and subscribed agents/sessions
### Important Design Notes
- **N+1 Query Fix**: The original GroupService had performance issues. The refactored version likely uses:
- Batch loading of related entities
- Selective field projection (only fetch needed fields)
- Explicit eager loading strategies
- Possibly database-level aggregations
- **New Agent Message Capability**: The addition of `create_agent_message` suggests the system now supports AI agent-generated responses, requiring different metadata or publishing logic than user messages
## Authorization and Security
While not visible in this module (implementation is in the specialized service files), the import of the `permissions` module indicates:
- **Permission checks** are performed on group operations (creation, updates, deletion, membership changes)
- **Workspace scoping** ensures groups are isolated by workspace
- **User context** is required and validated for all operations
The import of `session` suggests:
- Current user/workspace context is maintained in request-scoped sessions
- Service methods likely receive session/user context as parameters
## Dependencies and Integration
### Incoming Dependencies (What Uses This Module)
- **`router.py`**: FastAPI endpoint handlers that need to perform group and message operations
- **`agent_bridge.py`**: Agent integration layer that needs to create agent-generated messages and access group state
### Outgoing Dependencies (What This Module Uses)
**Domain Models & Schemas**:
- `schemas`: Data models for groups, messages (Pydantic or Beanie models)
- `agent`, `user`, `message`: Domain objects and types
- `workspace`: Workspace scoping and isolation
**Business Logic & Helpers**:
- `group_service`: Group CRUD and membership logic (specialized module)
- `message_service`: Message CRUD and agent message creation (specialized module)
- `errors`: Custom exceptions for validation, authorization, not-found scenarios
- `permissions`: Permission checking for access control
- `session`: Request-scoped user/workspace context
**Integrations & Events**:
- `ripple_normalizer`: Normalizes domain events for consistent publishing
- `events`: Domain event definitions and publishing
- `invite`: Group invitation workflows
- `pocket`: Pocket (notebook/snippet) integration within messages
- `message`: Low-level message handling
**User & Group Management**:
- `user`: User context and lookups
- `group_service`: (explicit import) Group operations
## Design Decisions
### 1. **Facade Pattern for Backward Compatibility**
**Decision**: Keep `service.py` as a re-export layer instead of deleting it
**Why**:
- Eliminates breaking changes for existing code
- Allows gradual migration to new import paths
- Makes refactoring non-disruptive to consumers
- Clear migration path for downstream code
**Trade-off**: Adds one level of indirection; the extra import is negligible in terms of performance but adds a slight conceptual layer
### 2. **Single Responsibility Split**
**Decision**: Separate GroupService and MessageService into dedicated modules
**Why**:
- Groups and messages have different lifecycle, permissions, and query patterns
- Reduces file size and complexity
- Makes the N+1 query problem in groups easier to isolate and fix
- Enables the new `create_agent_message` capability without mixing concerns
### 3. **Helper Functions as Re-exports**
**Decision**: Include `_group_response` and `_message_response` in re-exports
**Why**:
- These are used by routers to format responses consistently
- Including them in the facade ensures routers can import everything from one place
- Supports unified response formatting across the API
**Note**: The leading underscore (`_`) suggests these are private/internal helpers, but they're important enough to re-export, indicating routers need them
### 4. **Minimal Module Content**
**Decision**: Keep this module as thin as possible (just imports and re-exports)
**Why**:
- Reduces maintenance burden
- Makes the purpose clear: it's a compatibility layer, not business logic
- Prevents accidental logic from creeping into the facade
- Forces developers to maintain logic in specialized modules
## Patterns & Concepts
- **Stateless Services**: Both GroupService and MessageService are stateless—they encapsulate business logic without maintaining state
- **Facade Pattern**: This module acts as a unified interface to specialized service modules
- **Re-export for Backward Compatibility**: A refactoring pattern for maintaining API stability during architectural changes
- **Domain Services**: Services that handle bounded context logic (groups and messages are separate bounded contexts)
- **Event-Driven Architecture**: Integration with `events` and `ripple_normalizer` suggests domain events drive downstream updates
- **Workspace Scoping**: Multi-tenant isolation through workspace context
---
## Related
- [schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations](schemas-pydantic-requestresponse-data-models-for-workspace-domain-operations.md)
- [agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents](agent-agent-configuration-and-metadata-storage-for-workspace-scoped-ai-agents.md)
- [untitled](untitled.md)
- [pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag](pocket-data-models-for-pocket-workspaces-with-widgets-teams-and-collaborative-ag.md)
- [session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation](session-cloud-tracked-chat-session-document-model-for-pocket-scoped-conversation.md)
- [ripplenormalizer-normalizes-ai-generated-pocket-specifications-into-a-consistent](ripplenormalizer-normalizes-ai-generated-pocket-specifications-into-a-consistent.md)
- [events-in-process-async-pubsub-event-bus-for-decoupled-cross-domain-side-effects](events-in-process-async-pubsub-event-bus-for-decoupled-cross-domain-side-effects.md)
- [message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading](message-data-model-for-group-chat-messages-with-mentions-reactions-and-threading.md)
- [invite-workspace-membership-invitation-document-model](invite-workspace-membership-invitation-document-model.md)
- [workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl](workspace-data-model-for-organization-workspaces-in-multi-tenant-enterprise-depl.md)

View File

@@ -0,0 +1,169 @@
# session — Cloud-tracked chat session document model for pocket-scoped conversations
> The session module defines the Session document model that represents individual chat conversations in the PocketPaw system. It exists to provide a persistent, queryable data structure for tracking chat metadata (ownership, workspace affiliation, activity) while messages themselves are stored separately in Python memory. This module bridges the frontend UI contract (camelCase field naming) with the backend storage layer, enabling efficient session discovery and filtering across workspaces and organizational units.
**Categories:** chat / messaging, data model / ORM, workspace management, MongoDB persistence
**Concepts:** Session (class), TimestampedDocument (inheritance), Beanie ODM, MongoDB document model, Indexed fields, Unique constraints, Composite indexes, Soft deletion, Pydantic model, Field aliases
**Words:** 1694 | **Version:** 1
---
## Purpose
The session module solves the core data modeling problem for PocketPaw's chat system: how to track and organize conversations at scale. Each Session document represents a single chat conversation with metadata about who created it, where it lives (pocket/group/agent), when it was last active, and statistics like message count.
This module exists because:
1. **Metadata separation**: Messages are stored in Python memory for performance, but metadata needs persistent, queryable storage in MongoDB for discovery, history, and multi-instance coordination.
2. **Frontend contract alignment**: The field naming uses camelCase with explicit aliases to match the JavaScript/frontend API contract, ensuring seamless serialization without transformation layers.
3. **Multi-tenant scoping**: Sessions must be efficiently filtered by workspace and owned by users, requiring indexed fields for performant queries.
4. **Soft deletion support**: The `deleted_at` field enables logical deletion without losing historical records, important for audit trails and recovery.
In the system architecture, Session is a **data model layer** component that sits between:
- **Upward**: Frontend clients that query/create sessions and display chat history
- **Downward**: MongoDB via Beanie ODM for persistence
- **Sideways**: Service layer components (imported by `ee.cloud.models.service`) that implement business logic around session CRUD and filtering
## Key Classes and Methods
### Session (class)
**Purpose**: Represents a single chat session document with complete metadata for tracking, ownership, and organizational context.
**Key Fields**:
- `sessionId: Indexed(str, unique=True)` — Unique identifier for the session, guaranteed distinct across the system. The `Indexed` and `unique=True` parameters ensure database-level uniqueness constraints.
- `pocket: str | None` — The pocket (personal/private area) this session belongs to, if any. Nullable because sessions can be group or agent-scoped instead.
- `group: str | None` — Group identifier if this is a group conversation, mutually exclusive with pocket/agent in typical usage.
- `agent: str | None` — Agent identifier if this session is tied to a specific agent/bot, optional organizational unit.
- `workspace: Indexed(str)` — The workspace ID, required and indexed for tenant isolation. All queries are scoped to workspace.
- `owner: str` — User ID of the session creator, no default. Enables permission checks and ownership filtering.
- `title: str` — Human-readable session name, defaults to "New Chat" if not provided.
- `lastActivity: datetime` — Timestamp of the most recent activity in the session, automatically set to current UTC time on creation. Used for sorting and "recent conversations" UIs.
- `messageCount: int` — Counter tracking total messages in the session, defaults to 0. Incremented by service layer when messages are added.
- `deleted_at: datetime | None` — Soft-delete timestamp. If populated, the session is logically deleted. Query filters typically exclude sessions where `deleted_at` is not None.
**Inherited Behavior** (from `TimestampedDocument`):
- `created_at: datetime` — Automatically set when document is created
- `updated_at: datetime` — Automatically updated on any field modification
- `_id: ObjectId` — MongoDB default primary key
**Configuration**:
- `model_config = {"populate_by_name": True}` — Allows both the field name (`lastActivity`) and alias (`lastActivity`) to be accepted in JSON input/output, important for backward compatibility and client flexibility.
- `Settings.name = "sessions"` — Maps the Pydantic model to the MongoDB collection named "sessions".
- `Settings.indexes` — Two composite indexes:
1. `[("workspace", 1), ("pocket", 1), ("lastActivity", -1)]` — For finding recent sessions within a pocket; ascending workspace + pocket, descending last activity for natural "most recent first" ordering.
2. `[("workspace", 1), ("group", 1), ("agent", 1)]` — For finding sessions by group/agent within workspace; useful for filtering conversations by organizational unit.
These indexes are critical for query performance; without them, filtering across thousands of sessions would be slow.
## How It Works
### Data Flow
1. **Creation**: A service layer endpoint (or client) calls the repository to create a new Session. Pydantic validates all fields. `lastActivity` defaults to now in UTC if not provided. The document is inserted into MongoDB.
2. **Querying**: Service methods retrieve sessions using the indexed fields:
- "Show me the 10 most recent sessions in workspace W and pocket P" → Uses index 1 with workspace + pocket filters, ordered by lastActivity descending.
- "Show me all sessions in group G" → Uses index 2 with workspace + group filters.
- "Find session by ID" → Uses `sessionId` unique index.
3. **Updates**: When a message is added to a session (in Python memory), the service layer increments `messageCount` and updates `lastActivity` to current time. The `updated_at` field is auto-bumped by Beanie.
4. **Soft Deletion**: Instead of removing the document, service sets `deleted_at = datetime.now(UTC)`. Query filters add `deleted_at: None` condition to hide deleted sessions.
### Edge Cases
- **Null pocket/group/agent**: A session can be tied to a workspace + owner only, with all three of these fields None. Service queries must handle this carefully—don't assume one will always be populated.
- **messageCount out of sync**: If the Python message store crashes or loses data, `messageCount` on the Session document may no longer match reality. Service layer should consider this a metadata cache, not the source of truth.
- **lastActivity not updated**: If service layer forgets to update `lastActivity` when adding a message, sorting by "recent" will show stale data. Callers should depend on this being kept in sync.
- **Timezone handling**: The `Field(default_factory=lambda: datetime.now(UTC))` ensures UTC timezone awareness, avoiding ambiguity and daylight saving issues.
## Authorization and Security
This module defines the data structure; authorization is enforced at the service/endpoint layer:
- **Workspace isolation**: All queries should filter by `workspace` to prevent cross-tenant data leakage. A service function querying sessions without a workspace filter is a security bug.
- **Owner verification**: Endpoints should check that the requesting user matches `owner` (or has admin/group permission) before returning or modifying a session.
- **Soft delete privacy**: Queries must filter `deleted_at: None` unless the user has auditing/admin privileges.
The model itself does not enforce these; it is the responsibility of the service layer (imported by `ee.cloud.models.service`) to apply these rules.
## Dependencies and Integration
### Imports
- **base** (`ee.cloud.models.base.TimestampedDocument`) — Parent class providing `created_at`, `updated_at` fields and Beanie ODM integration. Session extends this to inherit automatic timestamp management.
- **beanie** (`Indexed`) — ODM (Object-Document Mapper) for MongoDB integration. `Indexed(str, unique=True)` tells MongoDB to create a unique index on `sessionId`.
- **pydantic** (`Field`) — Defines field metadata like aliases and defaults. `alias="sessionId"` maps the Python field name to JSON key names.
- **datetime** (`UTC`) — Standard library datetime utilities for timezone-aware timestamps.
### Imported By
- **`__init__`** (package initializer) — Likely re-exports Session for public API visibility, so callers use `from ee.cloud.models import Session` rather than the full path.
- **`service`** (`ee.cloud.models.service` or `ee.cloud.service`) — Business logic layer that performs CRUD operations on Session documents, implements filtering, updates messageCount, manages soft deletes, and enforces authorization.
### System Integration
- **Frontend clients** → POST `/sessions` with workspace, pocket, title → Service layer creates Session → Returns document with sessionId to client.
- **Message ingestion** → Client sends message → Service adds to Python message store, increments Session.messageCount, updates Session.lastActivity → MongoDB persistence.
- **Session discovery** → Client requests "show recent sessions" → Service queries using index 1 (workspace + pocket + lastActivity) → Returns sorted list.
- **Workspace deletion** → Admin deletes workspace W → Service queries all sessions with workspace=W and soft-deletes them (sets deleted_at).
## Design Decisions
### 1. **Metadata in MongoDB, Messages in Python**
Sessions metadata (timestamps, count, ownership) lives in MongoDB for durability and queryability. Messages are kept in Python process memory (presumably in-memory cache or separate storage). This separation trades off consistency (message count may drift) for:
- **Query performance**: Session list queries hit MongoDB indexes, not slower message stores.
- **Reduced database load**: Messages are often voluminous; storing only metadata keeps the collection lean.
- **Flexibility**: Message storage can be changed (Redis, S3, file system) without altering Session schema.
### 2. **camelCase Aliases for Frontend Contract**
Fields like `lastActivity` have `alias="lastActivity"` (the field and alias are identical here, but the pattern shows intent). The `populate_by_name = True` config allows both the Python name and JSON alias to work. This is intentional coupling to the frontend:
- **Pro**: No transformation layer needed; frontend sends `{"lastActivity": "..."}` and Pydantic maps it directly.
- **Con**: Changing field names requires frontend coordination. The comment "Field names use camelCase aliases to match the frontend contract" signals this is intentional.
### 3. **Soft Deletes with `deleted_at`
Instead of removing documents, sessions are marked deleted with a timestamp. Benefits:
- **Recoverability**: Admins can restore deleted sessions.
- **Audit trail**: Preserves "who deleted when" for compliance.
- **Query safety**: Default filters exclude `deleted_at IS NOT NULL`, reducing chance of accidental exposure.
Trade-off: Queries must always include the `deleted_at: None` filter, or garbage collection is needed periodically.
### 4. **Composite Indexes for Access Patterns**
Two indexes reflect expected query patterns:
- Index 1: Workspace + pocket + recent activity = "show my recent chats in my pocket"
- Index 2: Workspace + group + agent = "show all conversations in this group/agent"
These are not exhaustive; other queries (e.g., by owner, by agent alone) may not be optimized. Service layer should document which queries are O(log N) vs O(N).
### 5. **Indexed(str, unique=True) for sessionId**
The `sessionId` is unique cluster-wide. This could be a UUID, nanoid, or similar. The uniqueness constraint prevents accidental duplicates and enables foreign key references from message documents. Important assumption: sessionId generation is centralized and deterministic (e.g., a service method, not scattered clients).
### 6. **Nullable Pocket/Group/Agent**
These three fields are optional and likely mutually exclusive in practice (a session is scoped to one organizational unit). However, the schema allows all three to be None or any combination to be set. Service layer logic should validate the intended constraint (e.g., exactly one of {pocket, group, agent} is set), not the schema.
---
## Migration and Future Considerations
- **Message storage relocation**: If messages move from Python to a separate store (Firestore, Redis), the messageCount field becomes a cache that needs invalidation strategy.
- **Multi-tenant scale**: At 10M+ sessions per workspace, the composite indexes may need refinement or sharding by workspace.
- **Session archival**: Very old sessions (>1 year) could be archived to cold storage; the soft-delete pattern supports this.
- **Read replicas**: Queries can be routed to read replicas; writes (create, update, soft-delete) must hit the primary.
---
## Related
- [base-foundational-document-model-with-automatic-timestamp-management-for-mongodb](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)
- [untitled](untitled.md)

170
docs/wiki/untitled.md Normal file
View File

@@ -0,0 +1,170 @@
# Workspace Domain Service - Business Logic for Enterprise Cloud
> A stateless service layer that encapsulates workspace business logic including CRUD operations, member management, and invite handling. Implements role-based access control, seat limits, and event-driven notifications for multi-tenant workspace management.
**Categories:** Enterprise SaaS, Backend Service Architecture, Access Control & Security
**Concepts:** WorkspaceService, Workspace, User, WorkspaceMembership, Invite, Role-based access control, Seat limit, Soft delete, Token-based invitations, Event bus
**Words:** 869 | **Version:** 22
---
## Overview
The Workspace Domain service is a stateless business logic layer for managing enterprise cloud workspaces. It handles workspace lifecycle management, member administration, and invitation workflows with built-in authorization checks and seat limiting.
## Workspace CRUD Operations
### Create Workspace
Creates a new workspace with a unique slug and automatically adds the creator as the owner. Validates that the slug is not already in use by checking for existing non-deleted workspaces.
- Slug must be unique across non-deleted workspaces
- Creator is added with `owner` role
- Creator's `active_workspace` is set to the new workspace
- Returns workspace response with member count (1 on creation)
### Get Workspace
Retrieves a workspace by ID, requiring the requesting user to be a member. Returns current member count.
- Requires workspace membership
- Excludes soft-deleted workspaces (`deleted_at` is not null)
- Returns serialized workspace with computed member count
### Update Workspace
Updates workspace metadata (name and settings). Requires admin or higher role.
- Can update name and settings fields
- Requires `admin` minimum role
- Settings are wrapped in `WorkspaceSettings` model
### Delete Workspace
Soft-deletes a workspace by setting `deleted_at` timestamp. Requires owner role.
- Only owners can delete
- Soft delete prevents accidental data loss
- Workspace remains in database but is excluded from queries
### List User Workspaces
Returns all non-deleted workspaces a user belongs to, with member counts.
- Filters by user's workspace memberships
- Excludes deleted workspaces
- Returns serialized list with member counts
## Member Management
### List Members
Lists all members of a workspace with their roles and join dates. Requires workspace membership.
- Returns email, name, avatar, role, and join date for each member
- Includes metadata for each user's workspace membership
### Update Member Role
Changes a member's role within a workspace. Requires admin or higher role.
- Cannot demote the workspace owner
- Owner check prevents removing the last owner
- Validates target user exists and is a member
### Remove Member
Removes a member from a workspace. Requires admin or higher role.
- Cannot remove the workspace owner
- Clears the member's `active_workspace` if it was the removed workspace
- Emits `member.removed` event with workspace_id, user_id, and remover info
## Invite Workflow
### Create Invite
Generates an invitation to a workspace with a secure token. Requires admin or higher role.
- Validates seat limit not exceeded before issuing invite
- Prevents duplicate pending invites for same email and group combination
- Different groups can each have their own pending invite for the same email
- Workspace-level invites (no group) limited to one pending at a time
- Uses 32-byte URL-safe random tokens
- Expired invites can be re-issued
### Validate Invite
Checks invite status by token without authentication. Returns invite details including accepted, revoked, and expired flags.
- No authorization required
- Returns complete invite state
### Accept Invite
Accepts an invitation and adds the user to the workspace. User must be authenticated.
- Validates invite exists and is not already accepted, revoked, or expired
- Checks workspace still exists and is not deleted
- Only checks seat limit for new members (skips check if already a member)
- Adds user with invite's specified role
- Sets `active_workspace` to invited workspace
- Emits `invite.accepted` event with workspace_id, user_id, invite_id, and group_id
### Revoke Invite
Revokes an outstanding invitation. Requires admin or higher role.
- Sets `revoked` flag on invite
- Validates invite exists and belongs to specified workspace
## Authorization Model
### Role Hierarchy
- **owner**: Full workspace control, can delete, cannot be demoted or removed
- **admin**: Can manage members and invites, cannot delete workspace
- **member**: Basic access (implied lower tier)
### Access Control
- Workspace operations require membership via `_get_membership()` check
- Administrative operations require role validation via `check_workspace_role()`
- Owner-specific operations prevent degradation of sole owner status
## Data Models
### Workspace
- `id`: ObjectId
- `name`: Workspace display name
- `slug`: Unique URL identifier
- `owner`: User ID of owner
- `plan`: Plan type
- `seats`: Maximum member count
- `createdAt`: Workspace creation timestamp
- `deleted_at`: Soft delete timestamp (null if active)
- `settings`: WorkspaceSettings object
### WorkspaceMembership
- `workspace`: Workspace ID reference
- `role`: Member role (owner, admin, member)
- `joined_at`: Membership creation timestamp
### Invite
- `workspace`: Target workspace ID
- `email`: Invitee email address
- `role`: Role to assign upon acceptance
- `invited_by`: User ID of inviter
- `token`: Secure URL-safe token
- `group`: Optional group ID for scoped invites
- `accepted`: Boolean flag
- `revoked`: Boolean flag
- `expired`: Boolean flag
- `expires_at`: Expiration timestamp
## Response Serialization
All responses convert internal models to frontend-compatible dictionaries:
- Object IDs are converted to strings
- Timestamps are serialized to ISO format
- Sensitive fields are excluded from responses
## Event Emission
The service emits events via `event_bus` for audit and downstream processing:
- `member.removed`: When a member is removed from a workspace
- `invite.accepted`: When an invitation is accepted
## Error Handling
### Error Types
- `ConflictError`: Slug taken, invite already pending, invite already accepted
- `NotFound`: Workspace, user, member, or invite not found
- `Forbidden`: Permission denied, invite revoked/expired, cannot demote owner
- `SeatLimitError`: Member count equals or exceeds workspace seat limit

View File

@@ -0,0 +1,174 @@
# workspace — Data model for organization workspaces in multi-tenant enterprise deployments
> This module defines the core data models that represent a workspace: the container for an organization's entire deployment in PocketPaw's multi-tenant architecture. It includes Workspace (the main organizational entity with billing/licensing info) and WorkspaceSettings (configurable policies). The module exists as a separate layer to cleanly separate data persistence concerns from business logic, and serves as the contract between the database, service layer, and API routers.
**Categories:** data model, workspace management, multi-tenancy, MongoDB persistence
**Concepts:** Workspace, WorkspaceSettings, TimestampedDocument, soft delete, deleted_at, multi-tenancy, workspace scoping, slug, Indexed, unique constraint
**Words:** 1857 | **Version:** 1
---
## Purpose
This module is the **data persistence layer** for workspaces — the organizational unit in PocketPaw's multi-tenant SaaS architecture. In a multi-tenant system, one workspace = one enterprise customer or organization. Every user, agent, conversation, and data artifact belongs to exactly one workspace.
The module exists to:
1. **Define the schema** — What data is required to represent a workspace in the database?
2. **Enforce constraints** — Ensure workspace slugs are globally unique, define default values for settings
3. **Provide type safety** — Give the rest of the codebase a single source of truth for workspace structure (used by `router` and `service` modules)
4. **Enable Beanie integration** — Connect to MongoDB via the Beanie ODM with proper indexing
In the larger architecture, this is a **foundational domain model**. Most other operations in the system are scoped by workspace: you cannot query agents or conversations without specifying which workspace they belong to. This module is the root of that scoping hierarchy.
## Key Classes and Methods
### WorkspaceSettings
**Purpose**: Encapsulates configurable policies and defaults for a workspace. Not all settings need to be set at workspace creation; they can have sensible defaults.
**Fields**:
- `default_agent: str | None` — The ID of the agent that should be used by default in this workspace (e.g., when creating a new conversation without specifying an agent). `None` means the workspace hasn't set a default.
- `allow_invites: bool = True` — Whether users in this workspace can invite others. Controls team expansion permissions. Defaults to `True` (open to invites) to encourage collaboration.
- `retention_days: int | None = None` — Data retention policy: how many days to keep conversation history and logs. `None` means keep forever (unlimited retention). Important for compliance and cost management in enterprise deployments.
**Business Logic**: This is a **settings/configuration object**, not a document. It's embedded within a Workspace record, not stored separately. This means every workspace query returns its settings inline, avoiding extra database lookups for common configuration queries.
### Workspace(TimestampedDocument)
**Purpose**: The core organizational entity. Represents one customer/tenant in the multi-tenant system.
**Fields**:
- `name: str` — Human-readable workspace name (e.g., "Acme Corporation"). Not necessarily unique globally.
- `slug: Indexed(str, unique=True)` — URL-friendly identifier (e.g., "acme-corp"). Must be **globally unique** across all workspaces (enforced by MongoDB unique index). Used in URLs and programmatic references. The `Indexed(unique=True)` tells Beanie to create a database index and constraint.
- `owner: str` — User ID of the admin/owner who created this workspace. This is a foreign key reference to a User document (though not explicitly enforced here). The owner typically has full permissions to delete or reconfigure the workspace.
- `plan: str = "team"` — The subscription tier/license type. Valid values are `"team"`, `"business"`, `"enterprise"`. Determines what features are available and how many seats are granted. Sourced from the licensing system.
- `seats: int = 5` — Number of licensed user seats for this workspace. Default is 5 (suitable for small teams). Enterprise plans may have higher defaults or unlimited seats.
- `settings: WorkspaceSettings` — The embedded configuration object (see above). Defaults to `WorkspaceSettings()`, which gives all defaults (`default_agent=None`, `allow_invites=True`, `retention_days=None`).
- `deleted_at: datetime | None = None`**Soft delete** marker. If `None`, the workspace is active. If set to a timestamp, the workspace is logically deleted but the record remains in the database (for audit trails, data recovery, compliance). This is a common pattern in SaaS systems to preserve data integrity.
Inherits from `TimestampedDocument`:
- `created_at: datetime` — When the workspace was created (auto-set by base class)
- `updated_at: datetime` — When the workspace was last modified (auto-updated by base class)
- `_id: PydanticObjectId` — MongoDB document ID (auto-generated)
**Business Logic**:
- **Workspace Lifecycle**: A workspace starts with `deleted_at=None`. When deleted, the `deleted_at` field is set but the document remains. Queries for active workspaces should filter `deleted_at=None`.
- **Uniqueness Constraint**: The slug must be unique. This is critical for multi-tenancy: if two workspaces had the same slug, URL routing would be ambiguous.
- **Settings Inheritance**: When a new workspace is created, it gets default settings. Users can later update `settings` to customize behavior.
- **Owner as Admin**: The `owner` field identifies who has initial control. Authorization logic (in the `service` or router layer) likely checks if the current user is the owner before allowing destructive operations.
- **Plan-Driven Limits**: The `plan` field gates features. The `seats` field is typically enforced by the service layer: if you try to invite a 6th user to a team plan with 5 seats, the service rejects it.
**Beanie Integration**:
- Inherits from `TimestampedDocument` (defined in `ee.cloud.models.base`), which provides MongoDB document lifecycle (timestamps, ID generation).
- The `class Settings` inner class with `name = "workspaces"` tells Beanie to store Workspace documents in the MongoDB collection named `workspaces`.
## How It Works
**Creation Flow**:
1. An API endpoint (in the `router` module) receives a request to create a workspace (e.g., POST `/workspaces` with name, plan, etc.).
2. The router validates the input and calls the `service` layer.
3. The service layer (e.g., `WorkspaceService`) instantiates a Workspace model, sets defaults (like `deleted_at=None`, `settings=WorkspaceSettings()`).
4. Beanie saves it to MongoDB. The base class auto-sets `created_at` and `updated_at`. MongoDB auto-generates `_id`.
5. Beanie enforces the slug uniqueness constraint: if duplicate, it raises an error (caught and returned as HTTP 409 Conflict by the router).
**Retrieval Flow**:
1. Service queries: "Get workspace with slug='acme-corp'" → Beanie builds a MongoDB query and returns a Workspace instance.
2. The caller gets a fully-typed Python object with all fields populated.
3. The settings are already embedded, so no follow-up queries needed.
**Update Flow**:
1. Service retrieves the workspace, modifies a field (e.g., `workspace.plan = "enterprise"` or `workspace.settings.allow_invites = False`).
2. Calls `workspace.save()` (Beanie method). `updated_at` is auto-updated.
3. MongoDB updates just the fields that changed.
**Soft Delete Flow**:
1. Instead of deleting the document, the service sets `workspace.deleted_at = datetime.now()` and calls `save()`.
2. Queries for active workspaces add a filter: `Workspace.find({"deleted_at": None})`.
3. The document remains in the database for compliance/recovery, but is invisible to normal queries.
**Edge Cases**:
- **Duplicate Slug**: If creation tries to use an existing slug, Beanie raises a duplicate key error. The service/router should catch and return a user-friendly error.
- **Settings with None**: Fields like `retention_days=None` and `default_agent=None` are valid. The service layer interprets `None` as "no policy set" or "use system default".
- **Plan Mismatch**: If someone manually sets `plan="invalid"` (not one of the three valid values), Pydantic validation doesn't prevent it (no enum). The service layer should validate plan values.
- **Owner Deletion**: If the user referenced in `owner` is deleted, this model doesn't cascade-delete the workspace (it's just a string ID). The service layer must handle this scenario.
## Authorization and Security
This module **does not enforce authorization directly**. It defines the data structure; authorization is enforced at higher layers:
- **Who can view a workspace?** — Anyone with access to that workspace (determined by the `router` or service via user-workspace membership checks).
- **Who can modify workspace settings?** — Typically the owner (checked by the service before allowing updates).
- **Who can delete a workspace?** — Typically the owner; deletion is a soft delete (set `deleted_at`).
- **Cross-workspace visibility**: The model itself doesn't restrict cross-workspace queries, but the service layer should always filter by workspace when querying user data (e.g., "get agents in workspace X", not "get all agents").
The `slug: Indexed(str, unique=True)` is a technical constraint (uniqueness), not an authorization control.
## Dependencies and Integration
**Depends On**:
- **`ee.cloud.models.base`** — Imports `TimestampedDocument`, the base class that adds MongoDB integration, `_id`, `created_at`, and `updated_at` fields.
- **`beanie`** — The `Indexed` function creates database indexes and constraints. The model inherits Beanie's document methods (`save()`, `find()`, etc.).
- **`pydantic`** — `BaseModel` and `Field` provide data validation, serialization, and field customization. `WorkspaceSettings` is a plain Pydantic model (not a MongoDB document).
- **`datetime`** — Standard library for timestamp types (`created_at`, `updated_at`, `deleted_at`).
**Imported By**:
- **`__init__`** — Re-exports Workspace and WorkspaceSettings so other modules can import from the models package cleanly (`from ee.cloud.models import Workspace`).
- **`router`** — The API layer uses Workspace to define request/response schemas and query parameters. The router calls service methods that return Workspace instances.
- **`service`** — The business logic layer (likely `WorkspaceService`) performs CRUD operations on Workspace instances. It queries the database, validates business rules, and coordinates with other services.
**System Position**:
```
API Router (router.py)
↓ calls
WorkspaceService (service.py)
↓ uses
Workspace Model (this module) + WorkspaceSettings
↓ stored in
MongoDB via Beanie
```
Every other domain model (agents, conversations, users) likely includes a `workspace_id` field to establish which workspace owns the data. This module is the root.
## Design Decisions
**1. Embedded Settings vs. Separate Collection**
- **Choice**: `settings: WorkspaceSettings` is embedded (a nested object), not a separate MongoDB document.
- **Why**: Settings are small, always accessed together with the workspace, and rarely updated independently. Embedding avoids a join and keeps the data model simple.
- **Trade-off**: Can't have separate permission checks on settings (e.g., "readonly user can read workspace but not settings"). Acceptable for most enterprise SaaS.
**2. Soft Deletes with `deleted_at`**
- **Choice**: Deletion sets `deleted_at` instead of removing the document.
- **Why**: Preserves audit trails, enables data recovery, satisfies compliance requirements (GDPR right to erasure can be implemented as data anonymization + soft delete).
- **Cost**: Queries must filter `deleted_at=None`. Requires discipline in the service layer.
**3. Slug as Unique Identifier**
- **Choice**: `slug: Indexed(str, unique=True)` is a unique, human-readable identifier, not just the MongoDB `_id`.
- **Why**: URLs and programmatic references are cleaner with "acme-corp" than with a 24-character hex ObjectId. Enables vanity URLs.
- **Cost**: Slugs are harder to generate safely (must avoid collisions, handle Unicode, etc.). Typically generated from the workspace name and checked for uniqueness.
**4. Plan as String, Not Enum**
- **Choice**: `plan: str = "team"` is a string, not a Python enum.
- **Why**: Flexibility — new plans can be added in the license system without updating this model. Pydantic doesn't restrict to specific values.
- **Cost**: No compile-time safety. The service layer must validate that plan is one of the known values.
- **Better approach**: Would be `plan: Literal["team", "business", "enterprise"]` for type safety, but that's not shown here.
**5. Owner as User ID String, Not Reference**
- **Choice**: `owner: str` is a string (User ID), not a foreign key or reference field.
- **Why**: MongoDB doesn't enforce foreign keys. Document references are intentionally loose (schema flexibility). The service layer assumes the User exists elsewhere.
- **Cost**: Orphaned workspaces if the owner user is deleted. The service must handle this.
**6. Inheritance from TimestampedDocument**
- **Choice**: Workspace extends `TimestampedDocument` (from base.py), gaining `created_at`, `updated_at`, `_id`.
- **Why**: Code reuse. Every document in the system needs timestamps; centralizing in a base class avoids duplication.
- **Pattern**: Common in MongoDB/document-DB-backed services using Beanie or similar ODMs.
**7. Default Values**
- **Choice**: `plan="team"`, `seats=5`, `settings=WorkspaceSettings()`, `deleted_at=None`, `allow_invites=True`.
- **Why**: Sensible defaults reduce the chance of required-field errors. A small workspace can be created with just a name and owner.
- **Business Logic**: "New workspaces are team plans with 5 seats, invites enabled, and no retention limit by default."
---
## Related
- [base-foundational-document-model-with-automatic-timestamp-management-for-mongodb](base-foundational-document-model-with-automatic-timestamp-management-for-mongodb.md)
- [eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints](eecloudworkspace-router-re-export-for-fastapi-workspace-endpoints.md)
- [untitled](untitled.md)

27
ee/LICENSE Normal file
View File

@@ -0,0 +1,27 @@
Functional Source License, Version 1.1, Apache 2.0 Future License
Licensor: Qbtrix Inc.
Software: PocketPaw Enterprise Extensions
Use Limitation:
You may use this software for any purpose except competing with
PocketPaw or offering it as a managed service.
Change Date: Four years from the date the software is released.
Change License: Apache License, Version 2.0
For full FSL terms, see: https://fsl.software/
---
The code in this directory (ee/) is licensed separately from the
rest of the PocketPaw repository, which is under the Apache 2.0 license.
Enterprise features include:
- Fabric (ontology layer)
- Instinct (decision pipeline)
- Automations (triggers and schedules)
- Audit (enhanced compliance logging)
These features require a PocketPaw Enterprise license for production use.
After the Change Date, this code converts to Apache 2.0.

11
ee/__init__.py Normal file
View File

@@ -0,0 +1,11 @@
# PocketPaw Enterprise Extensions (ee/)
# Licensed under FSL 1.1 — see ee/LICENSE
# These features require a PocketPaw Enterprise license for production use.
# Updated: 2026-03-30 — Added api.py singleton for instinct_tools bridge.
#
# Modules:
# api.py — Singleton accessors (get_instinct_store)
# fabric/ — Ontology layer (objects, links, properties)
# instinct/ — Decision pipeline (actions, approvals, audit)
# automations/ — Time/data triggers
# audit/ — Enhanced compliance logging

26
ee/api.py Normal file
View File

@@ -0,0 +1,26 @@
# ee/api.py — Singleton entry point for the Instinct decision pipeline store.
# Created: 2026-03-30 — Bridges instinct_tools.py to the InstinctStore.
# The agent tools (pocketpaw.tools.builtin.instinct_tools) import from here
# via `from ee.api import get_instinct_store`.
from __future__ import annotations
from pathlib import Path
from ee.instinct.store import InstinctStore
_DB_PATH = Path.home() / ".pocketpaw" / "instinct.db"
_store: InstinctStore | None = None
def get_instinct_store() -> InstinctStore:
"""Return the global InstinctStore singleton.
Lazily creates the store on first call. The SQLite database is stored
at ~/.pocketpaw/instinct.db (same as the router uses).
"""
global _store
if _store is None:
_store = InstinctStore(_DB_PATH)
return _store

4
ee/audit/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
# Audit — enhanced compliance logging for Paw OS.
# Created: 2026-03-28 — Placeholder for future implementation.
# Extends instinct's audit log with export formats, retention policies,
# and compliance reporting (SOC2, GDPR).

View File

@@ -0,0 +1,4 @@
# Automations — time/data triggers for Paw OS.
# Created: 2026-03-28 — Placeholder for future implementation.
# "When inventory drops below 10, alert me."
# "Every Monday, generate the weekly report pocket."

121
ee/cloud/__init__.py Normal file
View File

@@ -0,0 +1,121 @@
"""PocketPaw Enterprise Cloud — domain-driven architecture.
Updated: Added kb (knowledge base) domain router to mount_cloud().
Domains: auth, workspace, chat, pockets, sessions, agents, kb.
Each has router.py (thin), service.py (logic), schemas.py (validation).
"""
from __future__ import annotations
from fastapi import Depends, FastAPI, Request
from fastapi.responses import JSONResponse
from ee.cloud.shared.errors import CloudError
def mount_cloud(app: FastAPI) -> None:
"""Mount all cloud domain routers and the error handler."""
# Global error handler
@app.exception_handler(CloudError)
async def cloud_error_handler(request: Request, exc: CloudError):
return JSONResponse(status_code=exc.status_code, content=exc.to_dict())
# Import and mount domain routers
from ee.cloud.agents.router import router as agents_router
from ee.cloud.auth.router import router as auth_router
from ee.cloud.chat.router import router as chat_router
from ee.cloud.license import get_license_info
from ee.cloud.pockets.router import router as pockets_router
from ee.cloud.sessions.router import router as sessions_router
from ee.cloud.workspace.router import router as workspace_router
app.include_router(auth_router, prefix="/api/v1")
app.include_router(workspace_router, prefix="/api/v1")
app.include_router(agents_router, prefix="/api/v1")
app.include_router(chat_router, prefix="/api/v1")
app.include_router(pockets_router, prefix="/api/v1")
app.include_router(sessions_router, prefix="/api/v1")
from ee.cloud.kb.router import router as kb_router
app.include_router(kb_router, prefix="/api/v1")
# User search endpoint — used by group settings, pocket sharing
from ee.cloud.models.user import User as UserModel
from ee.cloud.shared.deps import current_user, current_workspace_id
@app.get("/api/v1/users", tags=["Users"])
async def search_users(
search: str = "",
limit: int = 10,
user: UserModel = Depends(current_user),
workspace_id: str = Depends(current_workspace_id),
):
import re
query = {"workspaces.workspace": workspace_id}
if search:
pattern = re.compile(re.escape(search), re.IGNORECASE)
query["$or"] = [
{"email": {"$regex": pattern}},
{"full_name": {"$regex": pattern}},
]
users = await UserModel.find(query).limit(limit).to_list()
return [
{
"_id": str(u.id),
"email": u.email,
"name": u.full_name,
"avatar": u.avatar,
"status": u.status,
}
for u in users
]
# Serve uploaded avatars from ~/.pocketpaw/uploads/
from pathlib import Path
from fastapi.staticfiles import StaticFiles
uploads_dir = Path.home() / ".pocketpaw" / "uploads"
uploads_dir.mkdir(parents=True, exist_ok=True)
app.mount("/uploads", StaticFiles(directory=str(uploads_dir)), name="uploads")
# Mount WebSocket at root path (not under /api/v1 prefix)
# so frontend can connect to ws://host/ws/cloud?token=...
from ee.cloud.chat.router import websocket_endpoint
app.add_api_websocket_route("/ws/cloud", websocket_endpoint)
# License endpoint (no auth)
@app.get("/api/v1/license", tags=["License"])
async def license_info():
return get_license_info()
# Register cross-domain event handlers + agent bridge
from ee.cloud.shared.event_handlers import register_event_handlers
register_event_handlers()
from ee.cloud.shared.agent_bridge import register_agent_bridge
register_agent_bridge()
# Start/stop agent pool with app lifecycle + chat persistence
@app.on_event("startup")
async def _start_agent_pool():
# Register chat persistence bridge (saves runtime WS messages to MongoDB)
from ee.cloud.shared.chat_persistence import register_chat_persistence
register_chat_persistence()
from pocketpaw.agents.pool import get_agent_pool
await get_agent_pool().start()
@app.on_event("shutdown")
async def _stop_agent_pool():
from pocketpaw.agents.pool import get_agent_pool
await get_agent_pool().stop()

View File

@@ -0,0 +1 @@
from ee.cloud.agents.router import router # noqa: F401

View File

@@ -0,0 +1,189 @@
# knowledge.py — Agent knowledge service via the kb-go binary.
# Updated: 2026-04-07 — Switched from Python knowledge_base package to kb Go binary.
# Heavy extraction (PDF, OCR, URL) done in Python, piped as text to kb.
# All other operations delegate to subprocess calls.
"""Agent knowledge service — thin wrapper over the `kb` Go binary.
The kb binary (github.com/qbtrix/kb-go) handles compilation, search, indexing,
and storage. This wrapper handles heavy extraction (PDF, URL, OCR, DOCX) in
Python and pipes extracted text to kb via stdin.
"""
from __future__ import annotations
import json
import logging
import os
import subprocess
from pathlib import Path
logger = logging.getLogger(__name__)
KB_BIN = os.environ.get("POCKETPAW_KB_BIN", "kb")
def _kb(*args: str, input_text: str | None = None, timeout: int = 120) -> dict | list | str:
"""Call kb binary, return parsed JSON or raw text."""
cmd = [KB_BIN, *args, "--json"]
try:
result = subprocess.run(
cmd,
input=input_text,
capture_output=True,
text=True,
timeout=timeout,
)
except FileNotFoundError:
raise RuntimeError(
f"kb binary not found at '{KB_BIN}'. "
"Install: go install github.com/qbtrix/kb-go@latest "
"or set POCKETPAW_KB_BIN to the binary path."
)
if result.returncode != 0:
logger.warning("kb failed (exit %d): %s", result.returncode, result.stderr[:200])
raise RuntimeError(f"kb failed: {result.stderr[:200]}")
try:
return json.loads(result.stdout)
except json.JSONDecodeError:
return result.stdout.strip()
class KnowledgeService:
"""Agent-scoped knowledge operations via the kb Go binary."""
@staticmethod
async def ingest_text(agent_id: str, text: str, source: str = "manual") -> dict:
return _kb("ingest", "--scope", f"agent:{agent_id}", "--source", source, input_text=text)
@staticmethod
async def ingest_url(agent_id: str, url: str) -> dict:
"""Fetch URL with trafilatura (Python), pipe text to kb."""
try:
text = await _extract_url(url)
return _kb(
"ingest",
"--scope",
f"agent:{agent_id}",
"--source",
url,
input_text=text,
)
except Exception as exc:
return {"error": str(exc), "url": url}
@staticmethod
async def ingest_file(agent_id: str, file_path: str) -> dict:
"""Extract file content (PDF/DOCX via Python if needed), pipe to kb."""
try:
path = Path(file_path)
if path.suffix in (".pdf", ".docx", ".doc", ".png", ".jpg", ".jpeg"):
text = await _extract_file(file_path)
return _kb(
"ingest",
"--scope",
f"agent:{agent_id}",
"--source",
file_path,
input_text=text,
)
# Text/code files go directly to kb
return _kb("ingest", file_path, "--scope", f"agent:{agent_id}")
except Exception as exc:
return {"error": str(exc)}
@staticmethod
async def search(agent_id: str, query: str, limit: int = 5) -> list[str]:
results = _kb(
"search",
query,
"--scope",
f"agent:{agent_id}",
"--limit",
str(limit),
)
if isinstance(results, list):
return [r.get("summary", r.get("title", "")) for r in results]
return []
@staticmethod
async def search_context(agent_id: str, query: str, limit: int = 3) -> str:
"""Get formatted knowledge context for agent prompt injection."""
result = _kb(
"search",
query,
"--scope",
f"agent:{agent_id}",
"--limit",
str(limit),
"--context",
)
return result if isinstance(result, str) else ""
@staticmethod
async def clear(agent_id: str) -> dict:
return _kb("clear", "--scope", f"agent:{agent_id}")
@staticmethod
def stats(agent_id: str) -> dict:
return _kb("stats", "--scope", f"agent:{agent_id}")
@staticmethod
async def lint(agent_id: str) -> list[dict]:
return _kb("lint", "--scope", f"agent:{agent_id}")
# --- Heavy extraction (stays in Python) ---
async def _extract_url(url: str) -> str:
"""Extract article text from URL using trafilatura."""
try:
import httpx
import trafilatura
async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
resp = await client.get(url)
return trafilatura.extract(resp.text) or resp.text[:5000]
except ImportError:
# Fallback: just fetch raw HTML
import httpx
async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
resp = await client.get(url)
return resp.text[:10000]
async def _extract_file(file_path: str) -> str:
"""Extract text from PDF, DOCX, or image files."""
path = Path(file_path)
suffix = path.suffix.lower()
if suffix == ".pdf":
try:
from pypdf import PdfReader
reader = PdfReader(file_path)
return "\n".join(p.extract_text() or "" for p in reader.pages)
except ImportError:
raise RuntimeError("pypdf not installed — run: pip install pypdf")
if suffix in (".docx", ".doc"):
try:
from docx import Document
doc = Document(file_path)
return "\n".join(p.text for p in doc.paragraphs)
except ImportError:
raise RuntimeError("python-docx not installed — run: pip install python-docx")
if suffix in (".png", ".jpg", ".jpeg"):
try:
import pytesseract
from PIL import Image
return pytesseract.image_to_string(Image.open(file_path))
except ImportError:
raise RuntimeError("pytesseract not installed — run: pip install pytesseract Pillow")
# Fallback: read as text
return path.read_text(encoding="utf-8", errors="replace")

262
ee/cloud/agents/router.py Normal file
View File

@@ -0,0 +1,262 @@
"""Agents domain — FastAPI router."""
from __future__ import annotations
from fastapi import APIRouter, Depends, Query, Request, UploadFile
from fastapi import File as FastAPIFile
from starlette.responses import Response
from ee.cloud.agents.schemas import (
CreateAgentRequest,
DiscoverRequest,
UpdateAgentRequest,
)
from ee.cloud.agents.service import AgentService
from ee.cloud.license import require_license
from ee.cloud.shared.deps import (
current_user_id,
current_workspace_id,
require_action_any_workspace,
require_agent_owner_or_admin,
)
router = APIRouter(prefix="/agents", tags=["Agents"], dependencies=[Depends(require_license)])
# ---------------------------------------------------------------------------
# Backends discovery
# ---------------------------------------------------------------------------
@router.get("/backends")
async def list_available_backends():
"""List available agent backends with their display names."""
from pocketpaw.agents.registry import get_backend_info, list_backends
results = []
for name in list_backends():
try:
info = get_backend_info(name)
results.append(
{
"name": name,
"displayName": info.display_name if info else name,
"available": info is not None,
}
)
except Exception:
results.append({"name": name, "displayName": name, "available": False})
return results
# ---------------------------------------------------------------------------
# CRUD
# ---------------------------------------------------------------------------
@router.post("", dependencies=[Depends(require_action_any_workspace("agent.create"))])
async def create_agent(
body: CreateAgentRequest,
workspace_id: str = Depends(current_workspace_id),
user_id: str = Depends(current_user_id),
) -> dict:
return await AgentService.create(workspace_id, user_id, body)
@router.get("")
async def list_agents(
workspace_id: str = Depends(current_workspace_id),
query: str | None = Query(default=None),
) -> list[dict]:
return await AgentService.list_agents(workspace_id, query)
@router.get("/{agent_id}")
async def get_agent(agent_id: str) -> dict:
return await AgentService.get(agent_id)
@router.get("/uname/{slug}")
async def get_by_slug(
slug: str,
workspace_id: str = Depends(current_workspace_id),
) -> dict:
return await AgentService.get_by_slug(workspace_id, slug)
@router.patch("/{agent_id}", dependencies=[Depends(require_agent_owner_or_admin)])
async def update_agent(
agent_id: str,
body: UpdateAgentRequest,
user_id: str = Depends(current_user_id),
) -> dict:
return await AgentService.update(agent_id, user_id, body)
@router.delete("/{agent_id}", status_code=204, dependencies=[Depends(require_agent_owner_or_admin)])
async def delete_agent(
agent_id: str,
user_id: str = Depends(current_user_id),
) -> Response:
await AgentService.delete(agent_id, user_id)
return Response(status_code=204)
# ---------------------------------------------------------------------------
# Discovery
# ---------------------------------------------------------------------------
@router.post("/discover")
async def discover_agents(
body: DiscoverRequest,
workspace_id: str = Depends(current_workspace_id),
user_id: str = Depends(current_user_id),
) -> list[dict]:
return await AgentService.discover(workspace_id, user_id, body)
# ---------------------------------------------------------------------------
# Knowledge
# ---------------------------------------------------------------------------
@router.post("/{agent_id}/knowledge/text")
async def ingest_text(agent_id: str, body: dict):
"""Ingest plain text into agent's knowledge base."""
import logging
from ee.cloud.agents.knowledge import KnowledgeService
text = body.get("text", "")
source = body.get("source", "manual")
if not text:
return {"error": "No text provided"}
try:
return await KnowledgeService.ingest_text(agent_id, text, source)
except Exception as exc:
logging.getLogger(__name__).error("Knowledge ingest failed: %s", exc, exc_info=True)
return {"error": str(exc)}
@router.post("/{agent_id}/knowledge/url")
async def ingest_url(agent_id: str, body: dict):
"""Fetch and ingest a URL into agent's knowledge base."""
from ee.cloud.agents.knowledge import KnowledgeService
url = body.get("url", "")
if not url:
return {"error": "No URL provided"}
return await KnowledgeService.ingest_url(agent_id, url)
@router.post("/{agent_id}/knowledge/urls")
async def ingest_urls(agent_id: str, body: dict):
"""Batch ingest multiple URLs."""
from ee.cloud.agents.knowledge import KnowledgeService
urls = body.get("urls", [])
results = []
for url in urls:
result = await KnowledgeService.ingest_url(agent_id, url)
results.append(result)
return results
@router.get("/{agent_id}/knowledge/search")
async def search_knowledge(agent_id: str, q: str = Query(..., min_length=1), limit: int = 5):
"""Search agent's knowledge base."""
from ee.cloud.agents.knowledge import KnowledgeService
results = await KnowledgeService.search(agent_id, q, limit)
return {"results": results}
# ---------------------------------------------------------------------------
# Profile Picture Upload
# ---------------------------------------------------------------------------
@router.post("/{agent_id}/profile-pic")
async def upload_profile_pic(
agent_id: str,
request: Request,
file: UploadFile = FastAPIFile(...),
user_id: str = Depends(current_user_id),
):
"""Upload a profile picture for an agent."""
import uuid
from pathlib import Path
from fastapi import HTTPException
if not file.filename:
raise HTTPException(status_code=400, detail="No filename provided")
# Validate file type
allowed = {"image/jpeg", "image/png", "image/webp"}
if file.content_type not in allowed:
raise HTTPException(status_code=400, detail="Only JPEG, PNG, and WebP images are allowed")
content = await file.read()
if len(content) > 5 * 1024 * 1024:
raise HTTPException(status_code=400, detail="File size must be under 5 MB")
# Save to ~/.pocketpaw/uploads/avatars/
ext = Path(file.filename).suffix.lower() or ".png"
upload_dir = Path.home() / ".pocketpaw" / "uploads" / "avatars"
upload_dir.mkdir(parents=True, exist_ok=True)
filename = f"{agent_id}-{uuid.uuid4().hex[:8]}{ext}"
dest = upload_dir / filename
dest.write_bytes(content)
# Build full URL using the request's base URL
base = str(request.base_url).rstrip("/")
avatar_url = f"{base}/uploads/avatars/{filename}"
# Update the agent's avatar field
await AgentService.update(agent_id, user_id, UpdateAgentRequest(avatar=avatar_url))
return {"url": avatar_url}
@router.post("/{agent_id}/knowledge/upload")
async def upload_and_ingest(
agent_id: str,
file: UploadFile = FastAPIFile(...), # noqa: B008
):
"""Upload a file and ingest into agent's knowledge base.
Supports: .pdf, .txt, .md, .csv, .json, .docx, .png, .jpg, .jpeg, .webp
"""
import tempfile
from pathlib import Path
from ee.cloud.agents.knowledge import KnowledgeService
if not file.filename:
return {"error": "No filename provided"}
suffix = Path(file.filename).suffix.lower()
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
content = await file.read()
tmp.write(content)
tmp_path = tmp.name
try:
result = await KnowledgeService.ingest_file(agent_id, tmp_path)
result["originalName"] = file.filename
result["size"] = len(content)
return result
finally:
import os
os.unlink(tmp_path)
@router.delete("/{agent_id}/knowledge", status_code=204)
async def clear_knowledge(agent_id: str):
"""Clear all knowledge for an agent."""
from ee.cloud.agents.knowledge import KnowledgeService
await KnowledgeService.clear(agent_id)
return Response(status_code=204)

View File

@@ -0,0 +1,79 @@
"""Agents domain — Pydantic request/response schemas."""
from __future__ import annotations
from datetime import datetime
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# Requests
# ---------------------------------------------------------------------------
class CreateAgentRequest(BaseModel):
name: str = Field(min_length=1, max_length=100)
slug: str = Field(min_length=1, max_length=50)
avatar: str = ""
visibility: str = Field(default="private", pattern="^(private|workspace|public)$")
# Agent config
backend: str = "claude_agent_sdk"
model: str = ""
persona: str = ""
# Optional overrides
temperature: float | None = None
max_tokens: int | None = None
tools: list[str] | None = None
trust_level: int | None = None
system_prompt: str = ""
# Soul customization
soul_enabled: bool = True
soul_archetype: str = ""
soul_values: list[str] | None = None
soul_ocean: dict[str, float] | None = None
class UpdateAgentRequest(BaseModel):
name: str | None = None
avatar: str | None = None
visibility: str | None = Field(default=None, pattern="^(private|workspace|public)$")
config: dict | None = None
# Agent config overrides
backend: str | None = None
model: str | None = None
persona: str | None = None
temperature: float | None = None
max_tokens: int | None = None
tools: list[str] | None = None
trust_level: int | None = None
system_prompt: str | None = None
# Soul customization
soul_enabled: bool | None = None
soul_archetype: str | None = None
soul_values: list[str] | None = None
soul_ocean: dict[str, float] | None = None
class DiscoverRequest(BaseModel):
query: str = ""
visibility: str | None = None # filter
page: int = Field(default=1, ge=1)
page_size: int = Field(default=20, ge=1, le=100)
# ---------------------------------------------------------------------------
# Responses
# ---------------------------------------------------------------------------
class AgentResponse(BaseModel):
id: str
workspace: str
name: str
slug: str
avatar: str
visibility: str
config: dict
owner: str
created_at: datetime
updated_at: datetime

214
ee/cloud/agents/service.py Normal file
View File

@@ -0,0 +1,214 @@
"""Agents domain — business logic service."""
from __future__ import annotations
from beanie import PydanticObjectId
from ee.cloud.agents.schemas import (
CreateAgentRequest,
DiscoverRequest,
UpdateAgentRequest,
)
from ee.cloud.models.agent import Agent, AgentConfig
from ee.cloud.shared.errors import ConflictError, Forbidden, NotFound
def _agent_response(agent: Agent) -> dict:
"""Build a frontend-compatible dict from an Agent document."""
return {
"_id": str(agent.id),
"workspace": agent.workspace,
"name": agent.name,
"uname": agent.slug,
"avatar": agent.avatar,
"visibility": agent.visibility,
"config": agent.config.model_dump(),
"owner": agent.owner,
"createdOn": agent.createdAt.isoformat() if agent.createdAt else None,
"lastUpdatedOn": agent.updatedAt.isoformat() if agent.updatedAt else None,
}
class AgentService:
"""Stateless service encapsulating agent business logic."""
@staticmethod
async def create(workspace_id: str, user_id: str, body: CreateAgentRequest) -> dict:
"""Create an agent with slug uniqueness within the workspace."""
existing = await Agent.find_one(
Agent.workspace == workspace_id,
Agent.slug == body.slug,
)
if existing:
raise ConflictError(
"agent.slug_taken",
f"Slug '{body.slug}' is already in use in this workspace",
)
config_data: dict = {
"backend": body.backend,
"model": body.model,
"system_prompt": body.system_prompt,
"soul_enabled": body.soul_enabled,
"soul_persona": body.persona,
"soul_archetype": body.soul_archetype or f"The {body.name}",
}
if body.temperature is not None:
config_data["temperature"] = body.temperature
if body.max_tokens is not None:
config_data["max_tokens"] = body.max_tokens
if body.tools is not None:
config_data["tools"] = body.tools
if body.trust_level is not None:
config_data["trust_level"] = body.trust_level
if body.soul_values is not None:
config_data["soul_values"] = body.soul_values
if body.soul_ocean is not None:
config_data["soul_ocean"] = body.soul_ocean
config = AgentConfig(**config_data)
agent = Agent(
workspace=workspace_id,
name=body.name,
slug=body.slug,
avatar=body.avatar,
visibility=body.visibility,
config=config,
owner=user_id,
)
await agent.insert()
# Eagerly materialize the soul on disk so it exists before the agent's
# first chat. Failures are non-fatal — lazy init in AgentPool will retry.
if config.soul_enabled:
try:
from pocketpaw.agents.pool import get_agent_pool
await get_agent_pool().ensure_soul(agent)
except Exception:
import logging
logging.getLogger(__name__).warning(
"Eager soul creation failed for agent %s", agent.id, exc_info=True
)
return _agent_response(agent)
@staticmethod
async def list_agents(workspace_id: str, query: str | None = None) -> list[dict]:
"""List agents in a workspace with optional name search."""
filters: dict = {"workspace": workspace_id}
if query:
filters["name"] = {"$regex": query, "$options": "i"}
agents = await Agent.find(filters).to_list()
return [_agent_response(a) for a in agents]
@staticmethod
async def get(agent_id: str) -> dict:
"""Get a single agent by ID. Raises NotFound if missing."""
agent = await Agent.get(PydanticObjectId(agent_id))
if not agent:
raise NotFound("agent", agent_id)
return _agent_response(agent)
@staticmethod
async def get_by_slug(workspace_id: str, slug: str) -> dict:
"""Find an agent by slug within a workspace."""
agent = await Agent.find_one(
Agent.workspace == workspace_id,
Agent.slug == slug,
)
if not agent:
raise NotFound("agent", slug)
return _agent_response(agent)
@staticmethod
async def update(agent_id: str, user_id: str, body: UpdateAgentRequest) -> dict:
"""Update agent fields. Owner only."""
agent = await Agent.get(PydanticObjectId(agent_id))
if not agent:
raise NotFound("agent", agent_id)
if agent.owner != user_id:
raise Forbidden("agent.not_owner", "Only the agent owner can update it")
if body.name is not None:
agent.name = body.name
if body.avatar is not None:
agent.avatar = body.avatar
if body.visibility is not None:
agent.visibility = body.visibility
if body.config is not None:
agent.config = AgentConfig(**body.config)
else:
# Apply individual config/soul field overrides
current = agent.config.model_dump()
changed = False
for field, attr in [
("backend", body.backend),
("model", body.model),
("system_prompt", body.system_prompt),
("temperature", body.temperature),
("max_tokens", body.max_tokens),
("tools", body.tools),
("trust_level", body.trust_level),
("soul_enabled", body.soul_enabled),
("soul_archetype", body.soul_archetype),
("soul_values", body.soul_values),
("soul_ocean", body.soul_ocean),
]:
if attr is not None:
current[field] = attr
changed = True
if body.persona is not None:
current["soul_persona"] = body.persona
changed = True
if changed:
agent.config = AgentConfig(**current)
await agent.save()
return _agent_response(agent)
@staticmethod
async def delete(agent_id: str, user_id: str) -> None:
"""Hard-delete an agent. Owner only."""
agent = await Agent.get(PydanticObjectId(agent_id))
if not agent:
raise NotFound("agent", agent_id)
if agent.owner != user_id:
raise Forbidden("agent.not_owner", "Only the agent owner can delete it")
await agent.delete()
@staticmethod
async def discover(workspace_id: str, user_id: str, body: DiscoverRequest) -> list[dict]:
"""Paginated agent discovery with visibility filtering.
Visibility rules:
- private: only the requesting user's own agents
- workspace: all agents in the workspace
- public: all public agents (across workspaces)
"""
filters: dict = {}
if body.visibility == "private":
filters["workspace"] = workspace_id
filters["owner"] = user_id
elif body.visibility == "workspace":
filters["workspace"] = workspace_id
elif body.visibility == "public":
filters["visibility"] = "public"
else:
# Default: user's own agents + workspace-visible + public
filters["$or"] = [
{"workspace": workspace_id, "owner": user_id},
{"workspace": workspace_id, "visibility": "workspace"},
{"visibility": "public"},
]
if body.query:
filters["name"] = {"$regex": body.query, "$options": "i"}
skip = (body.page - 1) * body.page_size
agents = await Agent.find(filters).skip(skip).limit(body.page_size).to_list()
return [_agent_response(a) for a in agents]

20
ee/cloud/auth/__init__.py Normal file
View File

@@ -0,0 +1,20 @@
"""Auth domain — re-exports for backward compatibility."""
from ee.cloud.auth.core import ( # noqa: F401
SECRET,
TOKEN_LIFETIME,
UserCreate,
UserManager,
UserRead,
bearer_backend,
cookie_backend,
current_active_user,
current_optional_user,
fastapi_users,
get_jwt_strategy,
get_user_db,
get_user_manager,
seed_admin,
seed_workspace,
)
from ee.cloud.auth.router import router # noqa: F401

257
ee/cloud/auth/core.py Normal file
View File

@@ -0,0 +1,257 @@
"""Enterprise auth — fastapi-users with JWT cookie + bearer transport.
Changes: Added seed_workspace() to auto-create default workspace + General group
on first boot, so admin can immediately use the app after seeding.
Provides:
- POST /auth/register — sign up with email + password
- POST /auth/login — sign in, returns JWT cookie + token
- POST /auth/logout — clear cookie
- GET /auth/me — current user
- PATCH /auth/me — update profile
Admin seeding: call seed_admin() on startup to ensure a default admin exists.
Workspace seeding: call seed_workspace() after seed_admin() to bootstrap first workspace.
"""
from __future__ import annotations
import logging
import os
from beanie import PydanticObjectId
from fastapi import Depends, Request
from fastapi_users import BaseUserManager, FastAPIUsers
from fastapi_users import schemas as fastapi_users_schemas
from fastapi_users.authentication import (
AuthenticationBackend,
BearerTransport,
CookieTransport,
JWTStrategy,
)
from fastapi_users_db_beanie import BeanieUserDatabase, ObjectIDIDMixin
from ee.cloud.models.user import OAuthAccount, User, WorkspaceMembership
from ee.cloud.models.workspace import Workspace, WorkspaceSettings
logger = logging.getLogger(__name__)
SECRET = os.environ.get("AUTH_SECRET", "change-me-in-production-please")
TOKEN_LIFETIME = 60 * 60 * 24 * 7 # 7 days
# ---------------------------------------------------------------------------
# User database adapter
# ---------------------------------------------------------------------------
async def get_user_db():
yield BeanieUserDatabase(User, OAuthAccount)
# ---------------------------------------------------------------------------
# User manager (handles registration, password hashing, etc.)
# ---------------------------------------------------------------------------
class UserManager(ObjectIDIDMixin, BaseUserManager[User, PydanticObjectId]):
reset_password_token_secret = SECRET
verification_token_secret = SECRET
async def on_after_register(self, user: User, request: Request | None = None):
logger.info("User registered: %s (%s)", user.email, user.id)
async def on_after_login(self, user: User, request: Request | None = None, response=None):
logger.debug("User logged in: %s", user.email)
async def get_user_manager(user_db=Depends(get_user_db)):
yield UserManager(user_db)
# ---------------------------------------------------------------------------
# Auth backends — cookie (browser) + bearer (API/Tauri)
# ---------------------------------------------------------------------------
cookie_transport = CookieTransport(
cookie_name="paw_auth",
cookie_max_age=TOKEN_LIFETIME,
cookie_secure=False, # Set True in production with HTTPS
cookie_samesite="lax",
)
bearer_transport = BearerTransport(tokenUrl="/api/v1/auth/login")
def get_jwt_strategy() -> JWTStrategy:
return JWTStrategy(secret=SECRET, lifetime_seconds=TOKEN_LIFETIME)
cookie_backend = AuthenticationBackend(
name="cookie",
transport=cookie_transport,
get_strategy=get_jwt_strategy,
)
bearer_backend = AuthenticationBackend(
name="bearer",
transport=bearer_transport,
get_strategy=get_jwt_strategy,
)
# ---------------------------------------------------------------------------
# FastAPIUsers instance
# ---------------------------------------------------------------------------
fastapi_users = FastAPIUsers[User, PydanticObjectId](
get_user_manager,
[cookie_backend, bearer_backend],
)
# Current user dependencies
current_active_user = fastapi_users.current_user(active=True)
current_optional_user = fastapi_users.current_user(active=True, optional=True)
# ---------------------------------------------------------------------------
# Schemas for register/read
# ---------------------------------------------------------------------------
class UserRead(fastapi_users_schemas.BaseUser[PydanticObjectId]):
full_name: str = ""
avatar: str = ""
class UserCreate(fastapi_users_schemas.BaseUserCreate):
full_name: str = ""
# ---------------------------------------------------------------------------
# Admin seeding
# ---------------------------------------------------------------------------
async def seed_admin(
email: str | None = None,
password: str | None = None,
full_name: str | None = None,
) -> User | None:
"""Create default admin user if it doesn't exist.
Reads from env vars if args not provided:
ADMIN_EMAIL (default: admin@pocketpaw.ai)
ADMIN_PASSWORD (default: admin123)
ADMIN_NAME (default: Admin)
"""
email = email or os.environ.get("ADMIN_EMAIL", "admin@pocketpaw.ai")
password = password or os.environ.get("ADMIN_PASSWORD", "admin123")
full_name = full_name or os.environ.get("ADMIN_NAME", "Admin")
existing = await User.find_one(User.email == email)
if existing:
logger.debug("Admin user already exists: %s", email)
return existing
from fastapi_users.exceptions import UserAlreadyExists
db = BeanieUserDatabase(User, OAuthAccount)
manager = UserManager(db)
try:
user = await manager.create(
UserCreate(
email=email,
password=password,
full_name=full_name,
is_superuser=True,
is_verified=True,
),
)
user.full_name = full_name
await user.save()
logger.info("Admin user created: %s (password: %s)", email, password)
return user
except UserAlreadyExists:
return await User.find_one(User.email == email)
except Exception as exc:
logger.error("Failed to seed admin: %s", exc)
return None
async def seed_workspace(admin: User | None = None) -> Workspace | None:
"""Create a default workspace and General chat group if none exist.
Called after seed_admin() on startup. Skips if any workspace already exists.
"""
from datetime import UTC, datetime
if admin is None:
admin = await User.find_one(User.is_superuser == True) # noqa: E712
if not admin:
logger.debug("No admin user found — skipping workspace seed")
return None
# Skip if admin already has a workspace
if admin.workspaces:
logger.debug("Admin already has workspace(s) — skipping seed")
return None
# Also skip if any workspace exists at all
existing = await Workspace.find_one()
if existing:
logger.debug("Workspace already exists — skipping seed")
return None
ws_name = os.environ.get("DEFAULT_WORKSPACE_NAME", "PocketPaw")
ws_slug = os.environ.get("DEFAULT_WORKSPACE_SLUG", "pocketpaw")
try:
ws = Workspace(
name=ws_name,
slug=ws_slug,
owner=str(admin.id),
plan="enterprise",
seats=50,
settings=WorkspaceSettings(),
)
await ws.insert()
admin.workspaces.append(
WorkspaceMembership(
workspace=str(ws.id),
role="owner",
joined_at=datetime.now(UTC),
)
)
admin.active_workspace = str(ws.id)
await admin.save()
logger.info(
"Default workspace created: %s (slug: %s, id: %s)",
ws_name,
ws_slug,
ws.id,
)
# Create a default "General" chat group
try:
from ee.cloud.models.group import Group
group = Group(
workspace=str(ws.id),
name="General",
slug="general",
description="Default channel for team discussion",
type="public",
owner=str(admin.id),
members=[str(admin.id)],
)
await group.insert()
logger.info("Default 'General' group created in workspace %s", ws_name)
except Exception as exc:
logger.warning("Failed to create default group (non-fatal): %s", exc)
return ws
except Exception as exc:
logger.error("Failed to seed workspace: %s", exc)
return None

134
ee/cloud/auth/router.py Normal file
View File

@@ -0,0 +1,134 @@
"""Auth domain — FastAPI router."""
from __future__ import annotations
from pathlib import Path
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
from ee.cloud.auth.core import (
UserCreate,
UserRead,
bearer_backend,
cookie_backend,
current_active_user,
fastapi_users,
)
from ee.cloud.auth.schemas import ProfileUpdateRequest, SetWorkspaceRequest
from ee.cloud.auth.service import AuthService
from ee.cloud.models.user import User
router = APIRouter(tags=["Auth"])
# Avatar storage — local filesystem for now (could swap for S3/R2 later)
_AVATAR_DIR = Path.home() / ".pocketpaw" / "avatars"
_AVATAR_DIR.mkdir(parents=True, exist_ok=True)
_ALLOWED_AVATAR_TYPES = {"image/png", "image/jpeg", "image/webp", "image/gif"}
_MAX_AVATAR_SIZE = 5 * 1024 * 1024 # 5 MB
# ---------------------------------------------------------------------------
# fastapi-users auth routes (login/logout)
# ---------------------------------------------------------------------------
router.include_router(
fastapi_users.get_auth_router(cookie_backend),
prefix="/auth",
)
router.include_router(
fastapi_users.get_auth_router(bearer_backend),
prefix="/auth/bearer",
)
# Register route
router.include_router(
fastapi_users.get_register_router(UserRead, UserCreate),
prefix="/auth",
)
# ---------------------------------------------------------------------------
# Profile endpoints
# ---------------------------------------------------------------------------
@router.get("/auth/me")
async def get_me(user: User = Depends(current_active_user)):
return await AuthService.get_profile(user)
@router.patch("/auth/me")
async def update_me(
body: ProfileUpdateRequest,
user: User = Depends(current_active_user),
):
return await AuthService.update_profile(user, body)
@router.post("/auth/set-active-workspace")
async def set_active_workspace(
body: SetWorkspaceRequest,
user: User = Depends(current_active_user),
):
await AuthService.set_active_workspace(user, body.workspace_id)
return {"ok": True, "activeWorkspace": body.workspace_id}
@router.post("/auth/avatar")
async def upload_avatar(
file: UploadFile = File(...),
user: User = Depends(current_active_user),
):
"""Upload a profile picture. Returns the updated profile with the avatar URL."""
if file.content_type not in _ALLOWED_AVATAR_TYPES:
raise HTTPException(
status_code=400,
detail=f"Unsupported file type. Allowed: {', '.join(_ALLOWED_AVATAR_TYPES)}",
)
content = await file.read()
if len(content) > _MAX_AVATAR_SIZE:
raise HTTPException(status_code=413, detail="Avatar must be under 5MB")
# Determine extension from content-type
ext_map = {
"image/png": ".png",
"image/jpeg": ".jpg",
"image/webp": ".webp",
"image/gif": ".gif",
}
ext = ext_map.get(file.content_type or "", ".png")
filename = f"{user.id}{ext}"
dest = _AVATAR_DIR / filename
# Remove any old avatar with a different extension
for old in _AVATAR_DIR.glob(f"{user.id}.*"):
if old.name != filename:
try:
old.unlink()
except OSError:
pass
dest.write_bytes(content)
# Update user record — store a relative API path
avatar_path = f"/api/v1/auth/avatar/{filename}"
user.avatar = avatar_path
await user.save()
return await AuthService.get_profile(user)
@router.get("/auth/avatar/{filename}")
async def get_avatar(filename: str):
"""Serve a user's avatar file."""
from fastapi.responses import FileResponse
# Prevent path traversal
if "/" in filename or "\\" in filename or ".." in filename:
raise HTTPException(status_code=400, detail="Invalid filename")
path = _AVATAR_DIR / filename
if not path.exists() or not path.is_file():
raise HTTPException(status_code=404, detail="Avatar not found")
return FileResponse(path)

26
ee/cloud/auth/schemas.py Normal file
View File

@@ -0,0 +1,26 @@
"""Auth domain — request/response schemas."""
from __future__ import annotations
from pydantic import BaseModel
class ProfileUpdateRequest(BaseModel):
full_name: str | None = None
avatar: str | None = None
status: str | None = None
class SetWorkspaceRequest(BaseModel):
workspace_id: str
class UserResponse(BaseModel):
id: str
email: str
name: str
image: str
email_verified: bool
active_workspace: str | None
workspaces: list[dict]
model_config = {"from_attributes": True}

45
ee/cloud/auth/service.py Normal file
View File

@@ -0,0 +1,45 @@
"""Auth domain — business logic service."""
from __future__ import annotations
from fastapi import HTTPException
from ee.cloud.auth.schemas import ProfileUpdateRequest
from ee.cloud.models.user import User
class AuthService:
"""Stateless service encapsulating auth-related business logic."""
@staticmethod
async def get_profile(user: User) -> dict:
"""Return the current user's profile as a UserResponse."""
return {
"id": str(user.id),
"email": user.email,
"name": user.full_name,
"image": user.avatar,
"emailVerified": user.is_verified,
"activeWorkspace": user.active_workspace,
"workspaces": [{"workspace": w.workspace, "role": w.role} for w in user.workspaces],
}
@staticmethod
async def update_profile(user: User, body: ProfileUpdateRequest) -> dict:
"""Update mutable profile fields and return the updated profile."""
if body.full_name is not None:
user.full_name = body.full_name
if body.avatar is not None:
user.avatar = body.avatar
if body.status is not None:
user.status = body.status
await user.save()
return await AuthService.get_profile(user)
@staticmethod
async def set_active_workspace(user: User, workspace_id: str) -> None:
"""Set the user's active workspace."""
if not workspace_id:
raise HTTPException(400, "workspace_id required")
user.active_workspace = workspace_id
await user.save()

View File

@@ -0,0 +1,3 @@
"""Chat domain — groups, messages, WebSocket real-time."""
from ee.cloud.chat.router import router # noqa: F401

View File

@@ -0,0 +1,553 @@
# Refactored: Split from service.py — contains GroupService class and group-related
# helper functions. N+1 query in _group_response() fixed with batch loading for
# both members (User) and agents (AgentModel).
"""Chat domain — group business logic (CRUD, membership, agents, DMs)."""
from __future__ import annotations
import logging
import re
from typing import Literal
from beanie import PydanticObjectId
from ee.cloud.chat.schemas import (
AddGroupAgentRequest,
CreateGroupRequest,
UpdateGroupAgentRequest,
UpdateGroupRequest,
)
from ee.cloud.models.group import Group, GroupAgent, MemberRole
from ee.cloud.shared.errors import Forbidden, NotFound, ValidationError
from pocketpaw.ee.guards.actions import GroupRole
from pocketpaw.ee.guards.audit import log_denial
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _generate_slug(name: str) -> str:
"""Lowercase, replace spaces/underscores with hyphens, strip non-alnum."""
slug = name.lower().strip()
slug = re.sub(r"[\s_]+", "-", slug)
slug = re.sub(r"[^a-z0-9-]", "", slug)
slug = re.sub(r"-{2,}", "-", slug)
return slug.strip("-")
async def _group_response(group: Group) -> dict:
"""Convert a Group document to a frontend-compatible dict.
Populates member IDs -> {_id, name, email} and agent IDs ->
{_id, agent, name, role, respond_mode}.
Uses batch queries to avoid N+1 per-member / per-agent lookups.
"""
from ee.cloud.models.agent import Agent as AgentModel
from ee.cloud.models.user import User
# Batch load members
member_ids = [PydanticObjectId(uid) for uid in group.members]
users = await User.find({"_id": {"$in": member_ids}}).to_list() if member_ids else []
user_map = {str(u.id): u for u in users}
populated_members = []
for uid in group.members:
user = user_map.get(uid)
if user:
populated_members.append(
{
"_id": str(user.id),
"name": user.full_name or user.email,
"email": user.email,
"avatar": user.avatar,
}
)
else:
populated_members.append({"_id": uid, "name": uid, "email": ""})
# Batch load agents
agent_ids = [PydanticObjectId(ga.agent) for ga in group.agents]
agents = await AgentModel.find({"_id": {"$in": agent_ids}}).to_list() if agent_ids else []
agent_map = {str(a.id): a for a in agents}
populated_agents = []
for ga in group.agents:
agent_doc = agent_map.get(ga.agent)
populated_agents.append(
{
"_id": str(agent_doc.id) if agent_doc else ga.agent,
"agent": ga.agent,
"name": agent_doc.name if agent_doc else "Agent",
"uname": agent_doc.slug if agent_doc else "",
"avatar": agent_doc.avatar if agent_doc else "",
"role": ga.role,
"respond_mode": ga.respond_mode,
}
)
return {
"_id": str(group.id),
"workspace": group.workspace,
"name": group.name,
"slug": group.slug,
"description": group.description,
"type": group.type,
"icon": group.icon,
"color": group.color,
"owner": group.owner,
"members": populated_members,
"memberRoles": dict(group.member_roles),
"agents": populated_agents,
"pinnedMessages": group.pinned_messages,
"archived": group.archived,
"lastMessageAt": group.last_message_at.isoformat() if group.last_message_at else None,
"messageCount": group.message_count,
"createdAt": group.createdAt.isoformat() if group.createdAt else None,
}
def _require_group_member(group: Group, user_id: str) -> None:
"""Raise Forbidden if user is not a member of the group."""
if user_id not in group.members:
log_denial(
actor=user_id,
action="group.view",
code="group.not_member",
resource_id=str(group.id),
)
raise Forbidden("group.not_member", "You are not a member of this group")
def _require_group_admin(group: Group, user_id: str) -> None:
"""Raise Forbidden if user is not a group admin or owner.
Admin tier is derived from ``group.member_roles[user_id] == "admin"``.
The group owner is always an implicit admin.
"""
if group.owner == user_id:
return
if group.member_roles.get(user_id) == "admin":
return
log_denial(
actor=user_id,
action="group.admin",
code="group.not_admin",
resource_id=str(group.id),
)
raise Forbidden("group.not_admin", "Only group admins can perform this action")
def _role_for(group: Group, user_id: str) -> Literal["owner", "admin", "edit", "view", "none"]:
"""Return the role of a user in a group.
- "owner" if user_id == group.owner
- member_roles[user_id] if present ("admin" | "edit" | "view")
- "edit" if user is a member without an explicit role entry (back-compat default)
- "none" if user is not a member
"""
if group.owner == user_id:
return "owner"
if user_id not in group.members:
return "none"
explicit = group.member_roles.get(user_id)
if explicit in ("admin", "edit", "view"):
return explicit # type: ignore[return-value]
return "edit"
def resolve_group_role(group: Group, user_id: str) -> GroupRole:
"""Structured role resolution for the canonical guards matrix.
Raises Forbidden ``group.not_member`` if the user has no membership.
"""
raw = _role_for(group, user_id)
if raw == "none":
raise Forbidden("group.not_member", "You are not a member of this group")
return GroupRole.from_str("edit" if raw == "edit" else raw)
def _require_can_post(group: Group, user_id: str) -> None:
"""Raise Forbidden if the user's role in the group cannot post / mutate."""
role = _role_for(group, user_id)
if role == "view":
log_denial(
actor=user_id,
action="group.post",
code="group.view_only",
resource_id=str(group.id),
)
raise Forbidden("group.view_only", "You have read-only access in this group")
if role == "none":
log_denial(
actor=user_id,
action="group.post",
code="group.not_member",
resource_id=str(group.id),
)
raise Forbidden("group.not_member", "You are not a member of this group")
async def _get_group_or_404(group_id: str) -> Group:
"""Load a group by ID or raise NotFound."""
group = await Group.get(PydanticObjectId(group_id))
if not group:
raise NotFound("group", group_id)
return group
# ---------------------------------------------------------------------------
# GroupService
# ---------------------------------------------------------------------------
class GroupService:
"""Stateless service for group/channel business logic."""
@staticmethod
async def create_group(workspace_id: str, user_id: str, body: CreateGroupRequest) -> dict:
"""Create a group and add the creator as a member.
For DMs: validates exactly 2 member_ids, auto-names as "DM".
"""
if body.type == "dm":
if len(body.member_ids) != 1:
raise ValidationError(
"group.dm_requires_one_target",
"DM groups require exactly one target member_id (the other party)",
)
members = sorted({user_id, body.member_ids[0]})
name = "DM"
else:
members = list({user_id, *body.member_ids})
name = body.name
slug = _generate_slug(name)
group = Group(
workspace=workspace_id,
name=name,
slug=slug,
description=body.description,
type=body.type,
icon=body.icon,
color=body.color,
members=members,
owner=user_id,
)
await group.insert()
return await _group_response(group)
@staticmethod
async def list_groups(workspace_id: str, user_id: str) -> list[dict]:
"""List groups visible to the user.
Returns public groups in the workspace plus private/dm groups
where the user is a member.
"""
groups = await Group.find(
{
"workspace": workspace_id,
"archived": False,
"$or": [
# Public groups + channels are visible to any workspace member.
# Private groups + DMs require membership.
{"type": {"$in": ["public", "channel"]}},
{"members": user_id},
],
}
).to_list()
return [await _group_response(g) for g in groups]
@staticmethod
async def get_group(group_id: str, user_id: str) -> dict:
"""Get a single group. Private/DM groups require membership."""
group = await _get_group_or_404(group_id)
if group.type in ("private", "dm"):
_require_group_member(group, user_id)
return await _group_response(group)
@staticmethod
async def update_group(group_id: str, user_id: str, body: UpdateGroupRequest) -> dict:
"""Update group fields. Owner only. Cannot update DMs."""
group = await _get_group_or_404(group_id)
if group.type == "dm":
raise Forbidden("group.cannot_update_dm", "DM groups cannot be updated")
_require_group_admin(group, user_id)
if body.name is not None:
group.name = body.name
group.slug = _generate_slug(body.name)
if body.description is not None:
group.description = body.description
if body.icon is not None:
group.icon = body.icon
if body.color is not None:
group.color = body.color
if body.type is not None and body.type != group.type:
# DMs can't change type; enforced above. Switching between
# private/public/channel just changes who can read.
group.type = body.type
await group.save()
return await _group_response(group)
@staticmethod
async def archive_group(group_id: str, user_id: str) -> None:
"""Archive a group. Owner only."""
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
group.archived = True
await group.save()
@staticmethod
async def join_group(group_id: str, user_id: str) -> None:
"""Join a public group. Adds user to members list."""
group = await _get_group_or_404(group_id)
if group.type != "public":
raise Forbidden("group.not_public", "Only public groups can be joined directly")
if group.archived:
raise Forbidden("group.archived", "Cannot join an archived group")
if user_id not in group.members:
group.members.append(user_id)
await group.save()
@staticmethod
async def leave_group(group_id: str, user_id: str) -> None:
"""Leave a group. Owner cannot leave (must transfer ownership first)."""
group = await _get_group_or_404(group_id)
_require_group_member(group, user_id)
if group.owner == user_id:
raise Forbidden(
"group.owner_cannot_leave",
"The group owner cannot leave. Transfer ownership first.",
)
group.members.remove(user_id)
await group.save()
@staticmethod
async def add_members(
group_id: str,
user_id: str,
member_ids: list[str],
role: MemberRole = "edit",
) -> list[str]:
"""Add members to a group with an initial role. Owner only.
Returns the list of user IDs that were newly added (skipping duplicates).
Role "edit" is the default (no role entry is written to keep the dict
small); "view" writes an explicit entry per added member.
"""
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
if group.archived:
raise Forbidden("group.archived", "Cannot modify an archived group")
newly_added: list[str] = []
for mid in member_ids:
if mid not in group.members:
group.members.append(mid)
newly_added.append(mid)
if role in ("admin", "view"):
group.member_roles[mid] = role
elif mid in group.member_roles and role == "edit":
# Explicit edit removes any lingering admin/view entry
group.member_roles.pop(mid, None)
if newly_added or role in ("admin", "view"):
await group.save()
return newly_added
@staticmethod
async def remove_member(group_id: str, user_id: str, target_user_id: str) -> None:
"""Remove a member from a group. Owner only. Cannot remove the owner."""
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
if target_user_id == group.owner:
raise Forbidden("group.cannot_remove_owner", "Cannot remove the group owner")
if target_user_id not in group.members:
raise NotFound("member", target_user_id)
group.members.remove(target_user_id)
group.member_roles.pop(target_user_id, None)
await group.save()
@staticmethod
async def set_member_role(
group_id: str, user_id: str, target_user_id: str, role: MemberRole
) -> MemberRole:
"""Set a member's role to "edit" or "view". Owner only.
Cannot change the owner's role. Raises NotFound if target is not a member.
Returns the new role on success.
"""
if role not in ("admin", "edit", "view"):
raise ValidationError(
"group.invalid_role",
f"Role must be one of 'admin', 'edit', 'view'; got {role!r}",
)
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
if target_user_id == group.owner:
raise Forbidden("group.cannot_change_owner_role", "Cannot change the owner's role")
if target_user_id not in group.members:
raise NotFound("member", target_user_id)
if role == "edit":
group.member_roles.pop(target_user_id, None)
else:
group.member_roles[target_user_id] = role
await group.save()
return role
@staticmethod
async def add_agent(group_id: str, user_id: str, body: AddGroupAgentRequest) -> None:
"""Add an agent to a group. Owner only."""
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
# Check if agent is already in the group
for existing in group.agents:
if existing.agent == body.agent_id:
raise ValidationError(
"group.agent_already_added",
f"Agent '{body.agent_id}' is already in this group",
)
group.agents.append(
GroupAgent(
agent=body.agent_id,
role=body.role,
respond_mode=body.respond_mode,
)
)
await group.save()
@staticmethod
async def update_agent(
group_id: str, user_id: str, agent_id: str, body: UpdateGroupAgentRequest
) -> None:
"""Update an agent's respond_mode in a group. Owner only."""
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
for agent in group.agents:
if agent.agent == agent_id:
agent.respond_mode = body.respond_mode
await group.save()
return
raise NotFound("agent", agent_id)
@staticmethod
async def remove_agent(group_id: str, user_id: str, agent_id: str) -> None:
"""Remove an agent from a group. Owner only."""
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
original_len = len(group.agents)
group.agents = [a for a in group.agents if a.agent != agent_id]
if len(group.agents) == original_len:
raise NotFound("agent", agent_id)
await group.save()
@staticmethod
async def get_or_create_dm(workspace_id: str, user_id: str, target_user_id: str) -> dict:
"""Find an existing DM between two users, or create one.
DM groups have type="dm", sorted members, and name="DM".
"""
members = sorted([user_id, target_user_id])
existing = await Group.find_one(
{
"workspace": workspace_id,
"type": "dm",
"members": {"$all": members, "$size": len(members)},
}
)
if existing:
return await _group_response(existing)
group = Group(
workspace=workspace_id,
name="DM",
slug=_generate_slug("dm"),
type="dm",
members=members,
owner=user_id,
)
await group.insert()
return await _group_response(group)
@staticmethod
async def get_or_create_agent_dm(workspace_id: str, user_id: str, agent_id: str) -> dict:
"""Find or create a 1:1 DM between the user and an agent.
Stored as a type="dm" group with ``members=[user_id]`` and a single
``GroupAgent`` (respond_mode="auto" so the agent replies by default).
Verifies the user can see the agent (owner | workspace-visible | public).
"""
from ee.cloud.models.agent import Agent as AgentModel
# Resolve the agent and verify access
try:
agent_oid = PydanticObjectId(agent_id)
except Exception as exc: # noqa: BLE001 - surface as NotFound
raise NotFound("agent", agent_id) from exc
agent_doc = await AgentModel.get(agent_oid)
if not agent_doc:
raise NotFound("agent", agent_id)
visible = (
(agent_doc.workspace == workspace_id and agent_doc.owner == user_id)
or (agent_doc.workspace == workspace_id and agent_doc.visibility == "workspace")
or agent_doc.visibility == "public"
)
if not visible:
raise NotFound("agent", agent_id)
# Idempotent lookup: a DM in this workspace with exactly this user and this agent
existing = await Group.find_one(
{
"workspace": workspace_id,
"type": "dm",
"members": [user_id],
"agents.agent": agent_id,
}
)
if existing:
return await _group_response(existing)
group = Group(
workspace=workspace_id,
name="DM",
slug=_generate_slug("dm"),
type="dm",
members=[user_id],
agents=[GroupAgent(agent=agent_id, role="assistant", respond_mode="auto")],
owner=user_id,
)
await group.insert()
return await _group_response(group)

View File

@@ -0,0 +1,342 @@
# Refactored: Split from service.py — contains MessageService class and message-related
# helper functions. Added create_agent_message() static method for use by agent_bridge
# instead of creating Message documents directly.
"""Chat domain — message business logic (CRUD, reactions, threads, pins, search)."""
from __future__ import annotations
import logging
import re
from datetime import UTC, datetime
from beanie import PydanticObjectId
from ee.cloud.chat.group_service import (
_get_group_or_404,
_require_can_post,
_require_group_admin,
_require_group_member,
)
from ee.cloud.chat.schemas import (
EditMessageRequest,
SendMessageRequest,
)
from ee.cloud.models.message import Attachment, Mention, Message, Reaction
from ee.cloud.shared.errors import Forbidden, NotFound
from ee.cloud.shared.events import event_bus
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _message_response(msg: Message) -> dict:
"""Convert a Message document to a frontend-compatible dict."""
return {
"_id": str(msg.id),
"group": msg.group,
"sender": msg.sender,
"senderType": msg.sender_type,
"agent": msg.agent,
"content": msg.content,
"mentions": [m.model_dump() for m in msg.mentions],
"replyTo": msg.reply_to,
"attachments": [a.model_dump() for a in msg.attachments],
"reactions": [r.model_dump() for r in msg.reactions],
"edited": msg.edited,
"editedAt": msg.edited_at.isoformat() if msg.edited_at else None,
"deleted": msg.deleted,
"createdAt": msg.createdAt.isoformat() if msg.createdAt else None,
}
async def _get_message_or_404(message_id: str) -> Message:
"""Load a non-deleted message by ID or raise NotFound."""
msg = await Message.get(PydanticObjectId(message_id))
if not msg or msg.deleted:
raise NotFound("message", message_id)
return msg
# ---------------------------------------------------------------------------
# MessageService
# ---------------------------------------------------------------------------
class MessageService:
"""Stateless service for message business logic."""
@staticmethod
async def send_message(group_id: str, user_id: str, body: SendMessageRequest) -> dict:
"""Send a message to a group.
Verifies membership, checks group is not archived, creates the
Message document, emits a ``message.sent`` event, and updates
the group's last_message_at / message_count.
"""
group = await _get_group_or_404(group_id)
_require_can_post(group, user_id)
if group.archived:
raise Forbidden("group.archived", "Cannot send messages to an archived group")
mentions = [Mention(**m) for m in body.mentions]
attachments = [Attachment(**a) for a in body.attachments]
msg = Message(
group=group_id,
sender=user_id,
sender_type="user",
content=body.content,
mentions=mentions,
reply_to=body.reply_to,
attachments=attachments,
)
await msg.insert()
# Update group stats
group.last_message_at = msg.createdAt
group.message_count += 1
await group.save()
response = _message_response(msg)
await event_bus.emit(
"message.sent",
{
"group_id": group_id,
"message_id": str(msg.id),
"sender_id": user_id,
"sender_type": "user",
"content": body.content,
"mentions": body.mentions,
"workspace_id": group.workspace,
},
)
return response
@staticmethod
async def create_agent_message(
group_id: str,
agent_id: str,
content: str,
attachments: list[Attachment] | None = None,
) -> Message:
"""Create a message from an agent in a group.
Used by agent_bridge to persist agent responses instead of creating
Message documents directly. Returns the persisted Message document.
"""
msg = Message(
group=group_id,
sender=None,
sender_type="agent",
agent=agent_id,
content=content,
attachments=attachments or [],
)
await msg.insert()
# Update group stats
group = await _get_group_or_404(group_id)
group.last_message_at = msg.createdAt
group.message_count += 1
await group.save()
return msg
@staticmethod
async def edit_message(message_id: str, user_id: str, body: EditMessageRequest) -> dict:
"""Edit a message. Author only, and the author must still be able to post."""
msg = await _get_message_or_404(message_id)
if msg.sender != user_id:
raise Forbidden("message.not_author", "Only the message author can edit it")
# Defense-in-depth: if the author's role has been downgraded to view,
# block edits even though they authored the message.
group = await _get_group_or_404(msg.group)
_require_can_post(group, user_id)
msg.content = body.content
msg.edited = True
msg.edited_at = datetime.now(UTC)
await msg.save()
return _message_response(msg)
@staticmethod
async def delete_message(message_id: str, user_id: str) -> None:
"""Soft-delete a message. Author or group owner can delete."""
msg = await _get_message_or_404(message_id)
if msg.sender != user_id:
# Check if user is the group owner
group = await _get_group_or_404(msg.group)
if group.owner != user_id:
raise Forbidden(
"message.not_authorized",
"Only the author or group owner can delete this message",
)
msg.deleted = True
await msg.save()
@staticmethod
async def toggle_reaction(message_id: str, user_id: str, emoji: str) -> dict:
"""Toggle a reaction on a message.
If the user already reacted with the given emoji, remove their
reaction. Otherwise, add it. If the emoji reaction has no users
left, remove the entire reaction entry.
"""
msg = await _get_message_or_404(message_id)
# View-only members cannot react
group = await _get_group_or_404(msg.group)
_require_can_post(group, user_id)
# Find existing reaction for this emoji
existing: Reaction | None = None
for r in msg.reactions:
if r.emoji == emoji:
existing = r
break
if existing is not None:
if user_id in existing.users:
# Remove user from this reaction
existing.users.remove(user_id)
# Remove the reaction entry entirely if no users left
if not existing.users:
msg.reactions.remove(existing)
else:
existing.users.append(user_id)
else:
msg.reactions.append(Reaction(emoji=emoji, users=[user_id]))
await msg.save()
return _message_response(msg)
@staticmethod
async def get_messages(
group_id: str,
user_id: str,
cursor: str | None = None,
limit: int = 50,
) -> dict:
"""Cursor-based paginated messages, newest first.
Cursor format: ``"{iso_timestamp}|{object_id}"``.
Fetches ``limit + 1`` to determine ``has_more``.
Excludes soft-deleted messages.
"""
group = await _get_group_or_404(group_id)
if group.type in ("private", "dm"):
_require_group_member(group, user_id)
query: dict = {"group": group_id, "deleted": False}
if cursor:
parts = cursor.split("|", 1)
if len(parts) == 2:
cursor_time = datetime.fromisoformat(parts[0])
cursor_id = PydanticObjectId(parts[1])
query["$or"] = [
{"createdAt": {"$lt": cursor_time}},
{"createdAt": cursor_time, "_id": {"$lt": cursor_id}},
]
messages = (
await Message.find(query)
.sort([("createdAt", -1), ("_id", -1)])
.limit(limit + 1)
.to_list()
)
has_more = len(messages) > limit
if has_more:
messages = messages[:limit]
items = [_message_response(m) for m in messages]
next_cursor: str | None = None
if has_more and messages:
last = messages[-1]
next_cursor = f"{last.createdAt.isoformat()}|{last.id}"
return {"items": items, "nextCursor": next_cursor, "hasMore": has_more}
@staticmethod
async def get_thread(message_id: str, user_id: str) -> list[dict]:
"""Get all replies to a message, sorted ascending by creation time."""
msg = await _get_message_or_404(message_id)
# Verify user can access the group
group = await _get_group_or_404(msg.group)
if group.type in ("private", "dm"):
_require_group_member(group, user_id)
replies = (
await Message.find({"reply_to": str(msg.id), "deleted": False})
.sort([("createdAt", 1)])
.to_list()
)
return [_message_response(r) for r in replies]
@staticmethod
async def pin_message(group_id: str, user_id: str, message_id: str) -> None:
"""Pin a message in a group. Owner only."""
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
# Verify message belongs to this group
msg = await _get_message_or_404(message_id)
if msg.group != group_id:
raise NotFound("message", message_id)
if message_id not in group.pinned_messages:
group.pinned_messages.append(message_id)
await group.save()
@staticmethod
async def unpin_message(group_id: str, user_id: str, message_id: str) -> None:
"""Unpin a message from a group. Owner only."""
group = await _get_group_or_404(group_id)
_require_group_admin(group, user_id)
if message_id not in group.pinned_messages:
raise NotFound("pinned_message", message_id)
group.pinned_messages.remove(message_id)
await group.save()
@staticmethod
async def search_messages(group_id: str, user_id: str, query: str) -> list[dict]:
"""Search messages by content using regex. Limited to 50 results."""
group = await _get_group_or_404(group_id)
if group.type in ("private", "dm"):
_require_group_member(group, user_id)
# Escape regex special characters for safe search
escaped = re.escape(query)
messages = (
await Message.find(
{
"group": group_id,
"deleted": False,
"content": {"$regex": escaped, "$options": "i"},
}
)
.sort([("createdAt", -1)])
.limit(50)
.to_list()
)
return [_message_response(m) for m in messages]

611
ee/cloud/chat/router.py Normal file
View File

@@ -0,0 +1,611 @@
"""Chat domain — REST endpoints + WebSocket handler.
REST routes live under ``/chat`` and require an enterprise license.
The WebSocket endpoint at ``/ws/cloud`` authenticates via JWT query param.
"""
from __future__ import annotations
import json
import logging
import os
from fastapi import APIRouter, Depends, Query, WebSocket, WebSocketDisconnect
from ee.cloud.chat.schemas import (
AddGroupAgentRequest,
AddGroupMembersRequest,
CreateGroupRequest,
EditMessageRequest,
ReactRequest,
SendMessageRequest,
UpdateGroupAgentRequest,
UpdateGroupRequest,
UpdateMemberRoleRequest,
WsInbound,
WsOutbound,
)
from ee.cloud.chat.service import GroupService, MessageService
from ee.cloud.chat.ws import manager
from ee.cloud.license import require_license
from ee.cloud.shared.deps import (
current_user_id,
current_workspace_id,
require_group_action,
)
logger = logging.getLogger(__name__)
router = APIRouter(tags=["Chat"])
# REST endpoints require license
_licensed = APIRouter(prefix="/chat", dependencies=[Depends(require_license)])
# ---------------------------------------------------------------------------
# Groups
# ---------------------------------------------------------------------------
@_licensed.post("/groups")
async def create_group(
body: CreateGroupRequest,
workspace_id: str = Depends(current_workspace_id),
user_id: str = Depends(current_user_id),
):
return await GroupService.create_group(workspace_id, user_id, body)
@_licensed.get("/groups")
async def list_groups(
workspace_id: str = Depends(current_workspace_id),
user_id: str = Depends(current_user_id),
):
return await GroupService.list_groups(workspace_id, user_id)
@_licensed.get("/groups/{group_id}")
async def get_group(
group_id: str,
user_id: str = Depends(current_user_id),
):
return await GroupService.get_group(group_id, user_id)
@_licensed.patch(
"/groups/{group_id}",
dependencies=[Depends(require_group_action("group.admin"))],
)
async def update_group(
group_id: str,
body: UpdateGroupRequest,
user_id: str = Depends(current_user_id),
):
return await GroupService.update_group(group_id, user_id, body)
@_licensed.post(
"/groups/{group_id}/archive",
dependencies=[Depends(require_group_action("group.admin"))],
)
async def archive_group(
group_id: str,
user_id: str = Depends(current_user_id),
):
await GroupService.archive_group(group_id, user_id)
return {"ok": True}
@_licensed.post("/groups/{group_id}/join")
async def join_group(
group_id: str,
user_id: str = Depends(current_user_id),
):
await GroupService.join_group(group_id, user_id)
return {"ok": True}
@_licensed.post("/groups/{group_id}/leave")
async def leave_group(
group_id: str,
user_id: str = Depends(current_user_id),
):
await GroupService.leave_group(group_id, user_id)
return {"ok": True}
@_licensed.post(
"/groups/{group_id}/members",
dependencies=[Depends(require_group_action("group.admin"))],
)
async def add_members(
group_id: str,
body: AddGroupMembersRequest,
user_id: str = Depends(current_user_id),
):
added = await GroupService.add_members(group_id, user_id, body.user_ids, body.role)
await _broadcast_members_event(
group_id,
"members.added",
{"group_id": group_id, "user_ids": added, "role": body.role},
)
return {"ok": True, "added": added}
@_licensed.delete(
"/groups/{group_id}/members/{target_user_id}",
status_code=204,
dependencies=[Depends(require_group_action("group.admin"))],
)
async def remove_member(
group_id: str,
target_user_id: str,
user_id: str = Depends(current_user_id),
):
await GroupService.remove_member(group_id, user_id, target_user_id)
await _broadcast_members_event(
group_id,
"members.removed",
{"group_id": group_id, "user_id": target_user_id},
)
@_licensed.patch(
"/groups/{group_id}/members/{target_user_id}/role",
dependencies=[Depends(require_group_action("group.admin"))],
)
async def update_member_role(
group_id: str,
target_user_id: str,
body: UpdateMemberRoleRequest,
user_id: str = Depends(current_user_id),
):
new_role = await GroupService.set_member_role(group_id, user_id, target_user_id, body.role)
await _broadcast_members_event(
group_id,
"members.role_changed",
{"group_id": group_id, "user_id": target_user_id, "role": new_role},
)
return {"ok": True, "role": new_role}
# ---------------------------------------------------------------------------
# Group Agents
# ---------------------------------------------------------------------------
@_licensed.post(
"/groups/{group_id}/agents",
dependencies=[Depends(require_group_action("group.admin"))],
)
async def add_group_agent(
group_id: str,
body: AddGroupAgentRequest,
user_id: str = Depends(current_user_id),
):
await GroupService.add_agent(group_id, user_id, body)
return {"ok": True}
@_licensed.patch(
"/groups/{group_id}/agents/{agent_id}",
dependencies=[Depends(require_group_action("group.admin"))],
)
async def update_group_agent(
group_id: str,
agent_id: str,
body: UpdateGroupAgentRequest,
user_id: str = Depends(current_user_id),
):
await GroupService.update_agent(group_id, user_id, agent_id, body)
return {"ok": True}
@_licensed.delete(
"/groups/{group_id}/agents/{agent_id}",
status_code=204,
dependencies=[Depends(require_group_action("group.admin"))],
)
async def remove_group_agent(
group_id: str,
agent_id: str,
user_id: str = Depends(current_user_id),
):
await GroupService.remove_agent(group_id, user_id, agent_id)
# ---------------------------------------------------------------------------
# Messages
# ---------------------------------------------------------------------------
@_licensed.get("/groups/{group_id}/messages")
async def get_messages(
group_id: str,
user_id: str = Depends(current_user_id),
cursor: str | None = Query(None),
limit: int = Query(50, ge=1, le=100),
):
return await MessageService.get_messages(group_id, user_id, cursor, limit)
@_licensed.post("/groups/{group_id}/messages")
async def send_message(
group_id: str,
body: SendMessageRequest,
user_id: str = Depends(current_user_id),
):
return await MessageService.send_message(group_id, user_id, body)
@_licensed.patch("/messages/{message_id}")
async def edit_message(
message_id: str,
body: EditMessageRequest,
user_id: str = Depends(current_user_id),
):
return await MessageService.edit_message(message_id, user_id, body)
@_licensed.delete("/messages/{message_id}", status_code=204)
async def delete_message(
message_id: str,
user_id: str = Depends(current_user_id),
):
await MessageService.delete_message(message_id, user_id)
@_licensed.post("/messages/{message_id}/react")
async def react_to_message(
message_id: str,
body: ReactRequest,
user_id: str = Depends(current_user_id),
):
return await MessageService.toggle_reaction(message_id, user_id, body.emoji)
@_licensed.get("/messages/{message_id}/thread")
async def get_thread(
message_id: str,
user_id: str = Depends(current_user_id),
):
return await MessageService.get_thread(message_id, user_id)
# ---------------------------------------------------------------------------
# Pins
# ---------------------------------------------------------------------------
@_licensed.post("/groups/{group_id}/pin/{message_id}")
async def pin_message(
group_id: str,
message_id: str,
user_id: str = Depends(current_user_id),
):
await MessageService.pin_message(group_id, user_id, message_id)
return {"ok": True}
@_licensed.delete("/groups/{group_id}/pin/{message_id}", status_code=204)
async def unpin_message(
group_id: str,
message_id: str,
user_id: str = Depends(current_user_id),
):
await MessageService.unpin_message(group_id, user_id, message_id)
# ---------------------------------------------------------------------------
# Search
# ---------------------------------------------------------------------------
@_licensed.get("/groups/{group_id}/search")
async def search_messages(
group_id: str,
q: str = Query(..., min_length=1),
user_id: str = Depends(current_user_id),
):
return await MessageService.search_messages(group_id, user_id, q)
# ---------------------------------------------------------------------------
# DMs
# ---------------------------------------------------------------------------
@_licensed.post("/dm/{target_user_id}")
async def get_or_create_dm(
target_user_id: str,
workspace_id: str = Depends(current_workspace_id),
user_id: str = Depends(current_user_id),
):
return await GroupService.get_or_create_dm(workspace_id, user_id, target_user_id)
@_licensed.post("/dm-agent/{agent_id}")
async def get_or_create_agent_dm(
agent_id: str,
workspace_id: str = Depends(current_workspace_id),
user_id: str = Depends(current_user_id),
):
"""Find or create a 1:1 DM between the caller and an agent."""
return await GroupService.get_or_create_agent_dm(workspace_id, user_id, agent_id)
# Include licensed REST routes
router.include_router(_licensed)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _broadcast_members_event(group_id: str, event_type: str, data: dict) -> None:
"""Broadcast a member/role change to all current group members.
Loads the group freshly so the broadcast reflects post-mutation membership
(a removed user, for example, no longer receives the event).
"""
from beanie import PydanticObjectId
from ee.cloud.models.group import Group
group = await Group.get(PydanticObjectId(group_id))
if not group:
return
await manager.broadcast_to_group(
group_id,
group.members,
WsOutbound(type=event_type, data=data),
)
# ---------------------------------------------------------------------------
# WebSocket endpoint
# ---------------------------------------------------------------------------
@router.websocket("/ws/cloud")
async def websocket_endpoint(websocket: WebSocket, token: str = Query(...)):
"""Cloud WebSocket -- authenticate via JWT token, then handle typed JSON messages."""
import jwt as pyjwt
secret = os.environ.get("AUTH_SECRET", "change-me-in-production-please")
try:
payload = pyjwt.decode(token, secret, algorithms=["HS256"], audience=["fastapi-users:auth"])
user_id = payload.get("sub")
if not user_id:
await websocket.close(code=4001, reason="Invalid token")
return
except Exception:
await websocket.close(code=4001, reason="Invalid token")
return
# Accept and register connection
await websocket.accept()
await manager.connect(websocket, user_id)
try:
while True:
raw = await websocket.receive_text()
try:
data = json.loads(raw)
msg = WsInbound.model_validate(data)
except Exception:
await websocket.send_json(
WsOutbound(
type="error",
data={"code": "invalid_message", "message": "Invalid message format"},
).model_dump(mode="json")
)
continue
await _handle_ws_message(user_id, msg)
except WebSocketDisconnect:
pass
except Exception:
logger.exception("WebSocket error for user=%s", user_id)
finally:
last_user = await manager.disconnect(websocket)
if last_user:
# Start grace period before marking offline
pass # Presence broadcast handled by event handlers (Task 19)
# ---------------------------------------------------------------------------
# WebSocket message dispatcher
# ---------------------------------------------------------------------------
async def _handle_ws_message(user_id: str, msg: WsInbound) -> None:
"""Dispatch validated WebSocket message to the appropriate handler."""
if msg.type == "message.send":
await _ws_message_send(user_id, msg)
elif msg.type == "message.edit":
await _ws_message_edit(user_id, msg)
elif msg.type == "message.delete":
await _ws_message_delete(user_id, msg)
elif msg.type == "message.react":
await _ws_message_react(user_id, msg)
elif msg.type == "typing.start":
await _ws_typing(user_id, msg, active=True)
elif msg.type == "typing.stop":
await _ws_typing(user_id, msg, active=False)
elif msg.type == "presence.update":
pass # Will be wired in Task 19
elif msg.type == "read.ack":
await _ws_read_ack(user_id, msg)
async def _ws_message_send(user_id: str, msg: WsInbound) -> None:
if not msg.group_id or not msg.content:
return
body = SendMessageRequest(
content=msg.content,
reply_to=msg.reply_to,
mentions=msg.mentions,
attachments=msg.attachments,
)
result = await MessageService.send_message(msg.group_id, user_id, body)
from beanie import PydanticObjectId
from ee.cloud.models.group import Group
group = await Group.get(PydanticObjectId(msg.group_id))
if group:
result_data = result.model_dump(mode="json") if hasattr(result, "model_dump") else result
await manager.broadcast_to_group(
msg.group_id,
group.members,
WsOutbound(type="message.new", data=result_data),
exclude_user=user_id,
)
await manager.send_to_user(
user_id,
WsOutbound(type="message.sent", data=result_data),
)
async def _ws_message_edit(user_id: str, msg: WsInbound) -> None:
if not msg.message_id or not msg.content:
return
await MessageService.edit_message(
msg.message_id, user_id, EditMessageRequest(content=msg.content)
)
from beanie import PydanticObjectId
from ee.cloud.models.group import Group
from ee.cloud.models.message import Message
message = await Message.get(PydanticObjectId(msg.message_id))
if message:
group = await Group.get(PydanticObjectId(message.group))
if group:
await manager.broadcast_to_group(
message.group,
group.members,
WsOutbound(
type="message.edited",
data={
"message_id": msg.message_id,
"content": msg.content,
"edited_at": str(message.edited_at),
},
),
)
async def _ws_message_delete(user_id: str, msg: WsInbound) -> None:
if not msg.message_id:
return
# Fetch message before deleting so we know which group to broadcast to
from beanie import PydanticObjectId
from ee.cloud.models.group import Group
from ee.cloud.models.message import Message
message = await Message.get(PydanticObjectId(msg.message_id))
await MessageService.delete_message(msg.message_id, user_id)
if message:
group = await Group.get(PydanticObjectId(message.group))
if group:
await manager.broadcast_to_group(
message.group,
group.members,
WsOutbound(type="message.deleted", data={"message_id": msg.message_id}),
)
async def _ws_message_react(user_id: str, msg: WsInbound) -> None:
if not msg.message_id or not msg.emoji:
return
await MessageService.toggle_reaction(msg.message_id, user_id, msg.emoji)
from beanie import PydanticObjectId
from ee.cloud.models.group import Group
from ee.cloud.models.message import Message
message = await Message.get(PydanticObjectId(msg.message_id))
if message:
group = await Group.get(PydanticObjectId(message.group))
if group:
await manager.broadcast_to_group(
message.group,
group.members,
WsOutbound(
type="message.reaction",
data={
"message_id": msg.message_id,
"emoji": msg.emoji,
"user_id": user_id,
},
),
)
async def _ws_typing(user_id: str, msg: WsInbound, *, active: bool) -> None:
if not msg.group_id:
return
if active:
manager.start_typing(msg.group_id, user_id)
else:
manager.stop_typing(msg.group_id, user_id)
from beanie import PydanticObjectId
from ee.cloud.models.group import Group
group = await Group.get(PydanticObjectId(msg.group_id))
if group:
await manager.broadcast_to_group(
msg.group_id,
group.members,
WsOutbound(
type="typing",
data={
"group_id": msg.group_id,
"user_id": user_id,
"active": active,
},
),
exclude_user=user_id,
)
async def _ws_read_ack(user_id: str, msg: WsInbound) -> None:
if not msg.group_id or not msg.message_id:
return
from beanie import PydanticObjectId
from ee.cloud.models.group import Group
group = await Group.get(PydanticObjectId(msg.group_id))
if group:
await manager.broadcast_to_group(
msg.group_id,
group.members,
WsOutbound(
type="read.receipt",
data={
"group_id": msg.group_id,
"user_id": user_id,
"last_read": msg.message_id,
},
),
exclude_user=user_id,
)

149
ee/cloud/chat/schemas.py Normal file
View File

@@ -0,0 +1,149 @@
"""Request/response and WebSocket message schemas for chat."""
from __future__ import annotations
from datetime import datetime
from typing import Any, Literal
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# REST — Requests
# ---------------------------------------------------------------------------
class CreateGroupRequest(BaseModel):
name: str = Field(min_length=1, max_length=100)
description: str = ""
type: Literal["public", "private", "dm", "channel"] = "private"
member_ids: list[str] = Field(default_factory=list)
icon: str = ""
color: str = ""
class UpdateGroupRequest(BaseModel):
name: str | None = None
description: str | None = None
icon: str | None = None
color: str | None = None
# Toggle visibility — "private" (members-only) vs "public"/"channel"
# (any workspace member can read). DMs cannot be retyped.
type: Literal["public", "private", "channel"] | None = None
class AddGroupMembersRequest(BaseModel):
user_ids: list[str]
role: Literal["edit", "view"] = "edit"
class UpdateMemberRoleRequest(BaseModel):
role: Literal["edit", "view"]
class AddGroupAgentRequest(BaseModel):
agent_id: str
role: str = "assistant"
respond_mode: str = "auto"
class UpdateGroupAgentRequest(BaseModel):
respond_mode: str
class SendMessageRequest(BaseModel):
content: str = Field(min_length=1, max_length=10_000)
reply_to: str | None = None
mentions: list[dict] = Field(default_factory=list)
attachments: list[dict] = Field(default_factory=list)
class EditMessageRequest(BaseModel):
content: str = Field(min_length=1, max_length=10_000)
class ReactRequest(BaseModel):
emoji: str = Field(min_length=1, max_length=50)
# ---------------------------------------------------------------------------
# REST — Responses
# ---------------------------------------------------------------------------
class MessageResponse(BaseModel):
id: str
group: str
sender: str | None
sender_type: str
sender_name: str = ""
content: str
mentions: list[dict]
reply_to: str | None
attachments: list[dict]
reactions: list[dict]
edited: bool
edited_at: datetime | None
deleted: bool
created_at: datetime
class GroupResponse(BaseModel):
id: str
workspace: str
name: str
slug: str
description: str
type: str
icon: str
color: str
owner: str
members: list[Any] # User IDs or populated objects
agents: list[Any]
pinned_messages: list[str]
archived: bool
last_message_at: datetime | None
message_count: int
created_at: datetime
class CursorPage(BaseModel):
"""Cursor-based pagination response."""
items: list[MessageResponse]
next_cursor: str | None = None
has_more: bool = False
# ---------------------------------------------------------------------------
# WebSocket Schemas
# ---------------------------------------------------------------------------
class WsInbound(BaseModel):
"""Validated inbound WebSocket message from client."""
type: Literal[
"message.send",
"message.edit",
"message.delete",
"message.react",
"typing.start",
"typing.stop",
"presence.update",
"read.ack",
]
group_id: str | None = None
message_id: str | None = None
content: str | None = None
reply_to: str | None = None
mentions: list[dict] = Field(default_factory=list)
attachments: list[dict] = Field(default_factory=list)
emoji: str | None = None
status: str | None = None
class WsOutbound(BaseModel):
"""Outbound WebSocket message to client."""
type: str
data: dict = Field(default_factory=dict)

8
ee/cloud/chat/service.py Normal file
View File

@@ -0,0 +1,8 @@
# Refactored: Now a thin re-export module for backward compatibility.
# GroupService and helpers moved to group_service.py (with N+1 fix).
# MessageService and helpers moved to message_service.py (with new create_agent_message).
"""Chat domain — re-exports for backward compatibility."""
from ee.cloud.chat.group_service import GroupService, _group_response # noqa: F401
from ee.cloud.chat.message_service import MessageService, _message_response # noqa: F401

146
ee/cloud/chat/ws.py Normal file
View File

@@ -0,0 +1,146 @@
"""WebSocket connection manager for real-time chat.
Single endpoint: ws://host/ws/cloud?token=<JWT>
Handles:
- Connection lifecycle (connect -> authenticate -> active -> disconnect)
- User-to-connections mapping: user_id -> set[WebSocket] (multi-tab/device)
- Message routing to group members
- Typing indicators with auto-expiry (5s)
- Presence tracking with grace period (30s before marking offline)
"""
from __future__ import annotations
import asyncio
import logging
from fastapi import WebSocket
from ee.cloud.chat.schemas import WsOutbound
logger = logging.getLogger(__name__)
TYPING_TIMEOUT_SECONDS = 5
PRESENCE_GRACE_SECONDS = 30
class ConnectionManager:
"""Manages WebSocket connections, presence, and message routing."""
def __init__(self) -> None:
# user_id -> set of WebSocket connections
self.active_connections: dict[str, set[WebSocket]] = {}
# ws -> user_id (reverse lookup)
self._ws_to_user: dict[WebSocket, str] = {}
# Pending offline tasks (grace period before marking offline)
self._offline_tasks: dict[str, asyncio.Task] = {}
# Typing timers: (group_id, user_id) -> Task
self._typing_timers: dict[tuple[str, str], asyncio.Task] = {}
async def connect(self, websocket: WebSocket, user_id: str) -> None:
"""Register an authenticated WebSocket connection."""
if user_id not in self.active_connections:
self.active_connections[user_id] = set()
self.active_connections[user_id].add(websocket)
self._ws_to_user[websocket] = user_id
# Cancel any pending offline task
task = self._offline_tasks.pop(user_id, None)
if task:
task.cancel()
logger.info(
"WS connected: user=%s (connections=%d)",
user_id,
len(self.active_connections[user_id]),
)
async def disconnect(self, websocket: WebSocket) -> str | None:
"""Remove a connection.
Returns the user_id if this was their last connection (the caller
should start a grace-period offline timer). Returns ``None`` if the
user still has other active connections or the websocket was unknown.
"""
user_id = self._ws_to_user.pop(websocket, None)
if not user_id:
return None
conns = self.active_connections.get(user_id, set())
conns.discard(websocket)
if not conns:
# Last connection gone — return user_id for grace period handling
del self.active_connections[user_id]
return user_id
return None
def get_user_connections(self, user_id: str) -> set[WebSocket]:
"""Return the set of active WebSocket connections for a user."""
return self.active_connections.get(user_id, set())
def is_online(self, user_id: str) -> bool:
"""Check whether a user has at least one active connection."""
return bool(self.active_connections.get(user_id))
async def send_to_user(self, user_id: str, message: WsOutbound) -> None:
"""Send a message to all of a user's connections."""
data = message.model_dump(mode="json")
dead: list[WebSocket] = []
for ws in self.get_user_connections(user_id):
try:
await ws.send_json(data)
except Exception:
dead.append(ws)
# Clean up dead connections
for ws in dead:
await self.disconnect(ws)
async def broadcast_to_group(
self,
group_id: str,
member_ids: list[str],
message: WsOutbound,
exclude_user: str | None = None,
) -> None:
"""Broadcast a message to all online members of a group."""
for uid in member_ids:
if uid == exclude_user:
continue
await self.send_to_user(uid, message)
# ------------------------------------------------------------------
# Typing indicators
# ------------------------------------------------------------------
def start_typing(self, group_id: str, user_id: str) -> None:
"""Track typing with auto-expiry."""
key = (group_id, user_id)
# Cancel existing timer
existing = self._typing_timers.pop(key, None)
if existing:
existing.cancel()
# Start new timer
self._typing_timers[key] = asyncio.create_task(self._typing_timeout(key))
async def _typing_timeout(self, key: tuple[str, str]) -> None:
"""Auto-expire typing indicator after TYPING_TIMEOUT_SECONDS."""
await asyncio.sleep(TYPING_TIMEOUT_SECONDS)
self._typing_timers.pop(key, None)
def stop_typing(self, group_id: str, user_id: str) -> None:
"""Explicitly stop a typing indicator."""
key = (group_id, user_id)
task = self._typing_timers.pop(key, None)
if task:
task.cancel()
def is_typing(self, group_id: str, user_id: str) -> bool:
"""Check whether a user is currently typing in a group."""
return (group_id, user_id) in self._typing_timers
# Module-level singleton
manager = ConnectionManager()

2
ee/cloud/db.py Normal file
View File

@@ -0,0 +1,2 @@
# Backward compat — delegates to shared/db.py
from ee.cloud.shared.db import close_cloud_db, get_client, init_cloud_db # noqa: F401

Some files were not shown because too many files have changed in this diff Show More