Files
shivammittal274 e3d57e5347 feat(cli): production-ready CLI with auto-launch, install, and cross-platform builds (#555)
* feat(cli): production-ready CLI with auto-launch, install, and cross-platform builds

- init: accept URL argument and --auto flag for non-interactive setup
- install: new command to download BrowserOS app for current platform
- launch: auto-detect and launch BrowserOS when server is not running
- discovery: prefer server.json (live) over config.yaml (may be stale)
- errors: actionable messages guiding users to init/install
- goreleaser: cross-platform builds for 6 targets (darwin/linux/windows × amd64/arm64)
- ci: GitHub Actions workflow to release CLI binaries on cli/v* tag push

* fix(cli): check health status code and add progress dots during launch

- Health check in newClient() now verifies HTTP 200, not just no error
- waitForServer prints dots during the 30s poll so users know it's working

* refactor(cli): make launch an explicit command, remove auto-launch from newClient

- launch: new explicit command to find and open BrowserOS app
- launch: probes server.json, config, and common ports before launching
- launch: if already running, reports URL instead of launching again
- init --auto: uses port probing to find running servers
- install --deb: errors on non-Linux instead of silently downloading DMG
- error messages: guide users to launch/install/init explicitly
- removed: auto-launch from newClient() — CLI never does something surprising

* fix(cli): platform-native detection, launch, and install for all OSes

Detection (isBrowserOSInstalled):
- macOS: uses `open -Ra` to query Launch Services (no hardcoded paths)
- Linux: checks /usr/bin/browseros (.deb), browseros.desktop, AppImage search
- Windows: checks %LOCALAPPDATA%\BrowserOS\Application\BrowserOS.exe
  and HKCU/HKLM uninstall registry keys

Launch (startBrowserOS):
- macOS: `open -b com.browseros.BrowserOS` (bundle ID, not path)
- Linux: `browseros` binary, AppImage, or `gtk-launch browseros`
  (fixed: was using xdg-open which opens by MIME type, not desktop files)
- Windows: runs BrowserOS.exe from known Chromium per-user install path
  (fixed: was using `cmd /c start BrowserOS` which doesn't resolve)

Install (runPostInstall):
- macOS: hdiutil attach → cp -R to /Applications → hdiutil detach
- Linux: chmod +x for AppImage, dpkg -i instruction for .deb
- Windows: launches installer exe
- --deb flag now errors on non-Linux platforms

Removed auto-launch from newClient() — CLI never does surprising things.

Sources verified from:
- packages/browseros/build/common/context.py (binary names per platform)
- packages/browseros/build/modules/package/linux.py (.deb structure, .desktop file)
- packages/browseros/chromium_patches/chrome/install_static/chromium_install_modes.h
  (Windows base_app_name="BrowserOS", registry GUID, install paths)
- /Applications/BrowserOS.app/Contents/Info.plist (bundle ID)
2026-03-26 23:12:55 +05:30

208 lines
4.9 KiB
Go

package mcp
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"math"
"net/http"
"time"
sdkmcp "github.com/modelcontextprotocol/go-sdk/mcp"
)
type Client struct {
BaseURL string
HTTPClient *http.Client
Version string
Debug bool
}
func NewClient(baseURL, version string, timeout time.Duration) *Client {
return &Client{
BaseURL: baseURL,
HTTPClient: &http.Client{
Timeout: timeout,
},
Version: version,
}
}
func (c *Client) connect(ctx context.Context) (*sdkmcp.ClientSession, error) {
sdkClient := sdkmcp.NewClient(&sdkmcp.Implementation{
Name: "browseros-cli",
Version: c.Version,
}, nil)
transport := &sdkmcp.StreamableClientTransport{
Endpoint: c.BaseURL + "/mcp",
HTTPClient: c.HTTPClient,
DisableStandaloneSSE: true,
}
session, err := sdkClient.Connect(ctx, transport, nil)
if err != nil {
return nil, fmt.Errorf("cannot connect to BrowserOS at %s: %w\n\n"+
" If BrowserOS is running on a different port: browseros-cli init --auto\n"+
" If BrowserOS is not running: browseros-cli launch\n"+
" If not installed: browseros-cli install", c.BaseURL, err)
}
return session, nil
}
// CallTool connects, initializes, calls the named tool, and returns the result.
func (c *Client) CallTool(name string, args map[string]any) (*ToolResult, error) {
ctx, cancel := context.WithTimeout(context.Background(), c.HTTPClient.Timeout)
defer cancel()
session, err := c.connect(ctx)
if err != nil {
return nil, err
}
defer session.Close()
if args == nil {
args = map[string]any{}
}
sdkResult, err := session.CallTool(ctx, &sdkmcp.CallToolParams{
Name: name,
Arguments: args,
})
if err != nil {
return nil, err
}
result := convertResult(sdkResult)
if result.IsError {
return result, fmt.Errorf("%s", result.TextContent())
}
return result, nil
}
func convertResult(r *sdkmcp.CallToolResult) *ToolResult {
result := &ToolResult{
IsError: r.IsError,
}
for _, c := range r.Content {
switch v := c.(type) {
case *sdkmcp.TextContent:
result.Content = append(result.Content, ContentItem{Type: "text", Text: v.Text})
case *sdkmcp.ImageContent:
result.Content = append(result.Content, ContentItem{Type: "image", Data: base64.StdEncoding.EncodeToString(v.Data), MimeType: v.MIMEType})
}
}
if r.StructuredContent != nil {
switch sc := r.StructuredContent.(type) {
case map[string]any:
result.StructuredContent = sc
default:
data, err := json.Marshal(sc)
if err == nil {
var m map[string]any
if json.Unmarshal(data, &m) == nil {
result.StructuredContent = m
}
}
}
}
return result
}
// ResolvePageID returns the explicit page ID or fetches the active page.
func (c *Client) ResolvePageID(explicit *int) (int, error) {
if explicit != nil {
return *explicit, nil
}
result, err := c.CallTool("get_active_page", nil)
if err != nil {
return 0, fmt.Errorf("no active page: %w", err)
}
if pageID, ok := extractPageID(result.StructuredContent); ok {
return pageID, nil
}
return 0, fmt.Errorf("could not determine active page ID from response")
}
func extractPageID(sc map[string]any) (int, bool) {
if sc == nil {
return 0, false
}
if pageID, ok := intValue(sc["pageId"]); ok {
return pageID, true
}
page, ok := sc["page"].(map[string]any)
if !ok {
return 0, false
}
pageID, ok := intValue(page["pageId"])
if !ok {
return 0, false
}
return pageID, true
}
func intValue(v any) (int, bool) {
switch n := v.(type) {
case int:
return n, true
case int32:
return int(n), true
case int64:
return int(n), true
case float64:
if math.Trunc(n) != n {
return 0, false
}
return int(n), true
case json.Number:
i, err := n.Int64()
if err != nil {
return 0, false
}
return int(i), true
default:
return 0, false
}
}
// Health checks the /health endpoint (REST, not MCP).
func (c *Client) Health() (map[string]any, error) {
return c.restGET("/health")
}
// Status checks the /status endpoint (REST, not MCP).
func (c *Client) Status() (map[string]any, error) {
return c.restGET("/status")
}
func (c *Client) restGET(path string) (map[string]any, error) {
resp, err := c.HTTPClient.Get(c.BaseURL + path)
if err != nil {
return nil, fmt.Errorf("cannot connect to BrowserOS at %s: %w\n\n"+
" If BrowserOS is running on a different port: browseros-cli init --auto\n"+
" If BrowserOS is not running: browseros-cli launch\n"+
" If not installed: browseros-cli install", c.BaseURL, err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("server returned HTTP %d: %s", resp.StatusCode, string(body))
}
var data map[string]any
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
return nil, fmt.Errorf("parse response: %w", err)
}
return data, nil
}