Official client libraries for the XMem long-term memory API.
Available in Python, TypeScript, and Go.
All three SDKs share the same design principles:
- Three methods that map 1:1 to the API: ingest, retrieve, search
- Bearer-token authentication via constructor arg or
XMEM_API_KEYenv var - Typed error hierarchy so callers can handle auth, rate-limit, and server errors distinctly
- Zero config defaults — point at
localhost:8000with no key and it just works in dev
A running XMem API server:
uvicorn src.api.app:create_app --factory --host 0.0.0.0 --port 8000Location: client/xmem/
cd client
pip install -e xmem # or: pip install httpx (the only runtime dependency)from xmem import XMemClient
client = XMemClient(api_url="http://localhost:8000", api_key="sk-...")
# Check health
health = client.ping()
print(health.status, health.pipelines_ready)
# Ingest a conversation turn
result = client.ingest(
user_query="I just got promoted to senior engineer at Google!",
agent_response="Congratulations on your promotion!",
user_id="user_42",
)
print(result.model, result.profile, result.temporal)
# Retrieve an LLM-generated answer backed by memory
answer = client.retrieve(query="What is my job title?", user_id="user_42")
print(answer.answer)
print(answer.sources) # list of SourceRecord
print(answer.confidence)
# Raw semantic search (no LLM answer)
hits = client.search(
query="work history",
user_id="user_42",
domains=["profile", "temporal"],
top_k=10,
)
for r in hits.results:
print(f"[{r.domain}] {r.content} (score={r.score:.2f})")
client.close()from xmem import AsyncXMemClient
async with AsyncXMemClient(api_url="http://localhost:8000") as client:
result = await client.ingest(
user_query="I love hiking in the Rockies.",
user_id="user_42",
)
answer = await client.retrieve(query="hobbies", user_id="user_42")
print(answer.answer)from xmem import XMemClient, AuthenticationError, RateLimitError, NotReadyError
client = XMemClient(api_key="bad-key")
try:
client.ingest(user_query="test", user_id="u1")
except AuthenticationError as e:
print(f"Auth failed (HTTP {e.status_code}): {e.message}")
except RateLimitError as e:
print(f"Throttled, retry after {e.retry_after}s")
except NotReadyError:
print("Pipelines still loading, try again shortly")| Parameter | Env var | Default |
|---|---|---|
api_url |
XMEM_API_URL |
http://localhost:8000 |
api_key |
XMEM_API_KEY |
(empty, no auth) |
timeout |
— | 120 seconds |
Location: client/xmem-ts/
Package name: @xmem/sdk
cd client/xmem-ts
npm install
npm run buildimport { XMemClient } from "@xmem/sdk";
const client = new XMemClient("http://localhost:8000", "sk-...");
// Health
const ready = await client.isReady();
// Ingest
const result = await client.ingest({
user_query: "I just adopted a golden retriever named Max!",
agent_response: "That's wonderful!",
user_id: "user_42",
});
// Retrieve
const answer = await client.retrieve({
query: "Do I have any pets?",
user_id: "user_42",
});
console.log(answer.answer);
// Search
const hits = await client.search({
query: "pets",
user_id: "user_42",
domains: ["profile", "summary"],
top_k: 5,
});
hits.results.forEach((r) => console.log(`[${r.domain}] ${r.content}`));import { XMemClient, AuthenticationError, RateLimitError } from "@xmem/sdk";
try {
await client.ingest({ user_query: "test", user_id: "u1" });
} catch (e) {
if (e instanceof AuthenticationError) {
console.error(`Auth failed: ${e.message}`);
} else if (e instanceof RateLimitError) {
console.error(`Rate limited, retry after ${e.retryAfter}s`);
}
}Location: client/xmem-go/
Module: github.com/xmem/sdk-go
go get github.com/xmem/sdk-gopackage main
import (
"fmt"
xmem "github.com/xmem/sdk-go"
)
func main() {
client := xmem.NewClient("http://localhost:8000", "sk-...")
// Health
if client.IsReady() {
fmt.Println("XMem API is ready")
}
// Ingest
result, err := client.Ingest(xmem.IngestParams{
UserQuery: "I'm moving to Seattle next month.",
AgentResponse: "Good luck with your move!",
UserID: "user_42",
})
if err != nil {
panic(err)
}
fmt.Println("Model:", result.Model)
// Retrieve
answer, err := client.Retrieve(xmem.RetrieveParams{
Query: "Where am I moving?",
UserID: "user_42",
})
if err != nil {
panic(err)
}
fmt.Println("Answer:", answer.Answer)
// Search
hits, err := client.Search(xmem.SearchParams{
Query: "location",
UserID: "user_42",
Domains: []string{"profile", "temporal"},
TopK: 10,
})
if err != nil {
panic(err)
}
for _, r := range hits.Results {
fmt.Printf("[%s] %s (%.2f)\n", r.Domain, r.Content, r.Score)
}
}result, err := client.Ingest(params)
if err != nil {
switch e := err.(type) {
case *xmem.AuthenticationError:
fmt.Println("Bad API key:", e.Message)
case *xmem.RateLimitError:
fmt.Printf("Throttled, retry after %ds\n", e.RetryAfter)
case *xmem.NotReadyError:
fmt.Println("Pipelines loading, retry shortly")
default:
fmt.Println("Error:", err)
}
}All three SDKs expose the same three operations:
| Method | Endpoint | Description |
|---|---|---|
| ingest | POST /v1/memory/ingest |
Store a conversation turn. XMem classifies the input and extracts profile facts, temporal events, and summaries automatically. |
| retrieve | POST /v1/memory/retrieve |
Answer a question using stored memories. Returns an LLM-generated answer with source citations and a confidence score. |
| search | POST /v1/memory/search |
Raw semantic search across memory domains (profile, temporal, summary). Returns matching records without an LLM answer. |
| ping | GET /health |
Health/readiness check. Never raises on a valid HTTP response. |
| Error | HTTP status | When |
|---|---|---|
AuthenticationError |
401 / 403 | Missing or invalid API key |
RateLimitError |
429 | Per-key rate limit exceeded |
ValidationError |
422 | Request body failed validation |
NotReadyError |
503 | Pipelines still initializing |
ServerError |
5xx | Server-side failure |
ConnectionError |
— | Network timeout, DNS failure, connection refused |