Turn your local documents into an AI-powered knowledge base API in seconds.
Create dedicated endpoints that combine your documents (PDF, DOCX, TXT, CSV, web URLs) with local LLM inference. Each service runs on its own port and can be consumed from any language or tool.
http://localhost:{port}| Method | Endpoint | Description |
|---|---|---|
GET |
/ |
Service dashboard (HTML) |
GET |
/health |
Health check |
POST |
/chat |
Chat with RAG context (non-streaming) |
POST |
/chat/stream |
Chat with RAG context (Server-Sent Events) |
GET |
/v1/models |
OpenAI-compatible model list |
POST |
/v1/chat/completions |
OpenAI-compatible chat completions (supports stream) |
| Field | Type | Description |
|---|---|---|
messages |
array | Array of message objects with role and content |
{
"messages": [
{ "role": "system", "content": "Optional system prompt override" },
{ "role": "user", "content": "What does the invoice say about payment terms?" }
]
}
{
"model": "kairos-raas",
"content": "Based on the document, the payment terms are Net 30...",
"token_count": 42
}
Same as /chat
data: {"content": "Based"}
data: {"content": " on"}
data: {"content": " the"}
...
data: [DONE]
{
"model": "kairos-raas",
"stream": true,
"messages": [
{ "role": "user", "content": "Summarize this KB" }
]
}
data: {"object":"chat.completion.chunk","choices":[{"delta":{"role":"assistant"}}]}
data: {"object":"chat.completion.chunk","choices":[{"delta":{"content":"Hello"}}]}
data: {"object":"chat.completion.chunk","choices":[{"finish_reason":"stop","delta":{}}]}
data: [DONE]
{
"status": "ok",
"service": "My Knowledge Base",
"port": 5001
}
All examples use http://localhost:5001 — replace 5001 with your service's
configured port.
import requests
BASE_URL = "http://localhost:5001"
response = requests.post(f"{BASE_URL}/chat", json={
"messages": [
{"role": "user", "content": "Summarize the uploaded document"}
]
})
data = response.json()
print(data["content"])
import requests
import json
response = requests.post(
"http://localhost:5001/chat/stream",
json={"messages": [{"role": "user", "content": "What are the key findings?"}]},
stream=True
)
for line in response.iter_lines():
if line:
text = line.decode("utf-8")
if text.startswith("data: ") and text != "data: [DONE]":
chunk = json.loads(text[6:])
print(chunk["content"], end="", flush=True)
import httpx
with httpx.Client(base_url="http://localhost:5001") as client:
r = client.post("/chat", json={
"messages": [{"role": "user", "content": "List all action items from the document"}]
})
print(r.json()["content"])
using System.Net.Http.Json;
var client = new HttpClient { BaseAddress = new Uri("http://localhost:5001") };
var request = new
{
messages = new[]
{
new { role = "user", content = "What is the total amount on this invoice?" }
}
};
var response = await client.PostAsJsonAsync("/chat", request);
var result = await response.Content.ReadFromJsonAsync<ChatResponse>();
Console.WriteLine(result?.Content);
// Response model
record ChatResponse(string Model, string Content, int TokenCount);
using System.Net.Http.Json;
using System.Text.Json;
var client = new HttpClient { BaseAddress = new Uri("http://localhost:5001") };
var request = new
{
messages = new[] { new { role = "user", content = "Explain the contract terms" } }
};
var httpRequest = new HttpRequestMessage(HttpMethod.Post, "/chat/stream")
{
Content = JsonContent.Create(request)
};
var response = await client.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead);
using var stream = await response.Content.ReadAsStreamAsync();
using var reader = new StreamReader(stream);
while (!reader.EndOfStream)
{
var line = await reader.ReadLineAsync();
if (string.IsNullOrEmpty(line)) continue;
if (line == "data: [DONE]") break;
if (line.StartsWith("data: "))
{
var json = line[6..];
var chunk = JsonSerializer.Deserialize<JsonElement>(json);
Console.Write(chunk.GetProperty("content").GetString());
}
}
// Node.js / Browser (fetch API)
const response = await fetch("http://localhost:5001/chat", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
messages: [{ role: "user", content: "What does this document say about deadlines?" }]
})
});
const data = await response.json();
console.log(data.content);
const response = await fetch("http://localhost:5001/chat/stream", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
messages: [{ role: "user", content: "List the key points" }]
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const text = decoder.decode(value);
for (const line of text.split("\n")) {
if (line.startsWith("data: ") && line !== "data: [DONE]") {
const chunk = JSON.parse(line.slice(6));
process.stdout.write(chunk.content);
}
}
}
import java.net.URI;
import java.net.http.*;
import com.google.gson.JsonParser;
public class KairosRaasClient {
private static final String BASE_URL = "http://localhost:5001";
public static void main(String[] args) throws Exception {
HttpClient client = HttpClient.newHttpClient();
String body = """
{
"messages": [
{"role": "user", "content": "What are the payment terms?"}
]
}
""";
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(BASE_URL + "/chat"))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(body))
.build();
HttpResponse<String> response = client.send(request,
HttpResponse.BodyHandlers.ofString());
var json = JsonParser.parseString(response.body()).getAsJsonObject();
System.out.println(json.get("content").getAsString());
}
}
import java.net.URI;
import java.net.http.*;
import java.util.stream.Stream;
import com.google.gson.JsonParser;
HttpClient client = HttpClient.newHttpClient();
String body = """
{"messages": [{"role": "user", "content": "Summarize the report"}]}
""";
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:5001/chat/stream"))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(body))
.build();
HttpResponse<Stream<String>> response = client.send(request,
HttpResponse.BodyHandlers.ofLines());
response.body().forEach(line -> {
if (line.startsWith("data: ") && !line.equals("data: [DONE]")) {
var json = JsonParser.parseString(line.substring(6)).getAsJsonObject();
System.out.print(json.get("content").getAsString());
}
});
use reqwest::Client;
use serde_json::{json, Value};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = Client::new();
let response = client
.post("http://localhost:5001/chat")
.json(&json!({
"messages": [
{"role": "user", "content": "What is the summary?"}
]
}))
.send()
.await?;
let data: Value = response.json().await?;
println!("{}", data["content"].as_str().unwrap_or_default());
Ok(())
}
curl http://localhost:5001/health
curl -X POST http://localhost:5001/chat \
-H "Content-Type: application/json" \
-d '{
"messages": [
{"role": "user", "content": "Summarize the uploaded document"}
]
}'
curl -N -X POST http://localhost:5001/chat/stream \
-H "Content-Type: application/json" \
-d '{
"messages": [
{"role": "user", "content": "What are the key findings?"}
]
}'
$body = @{
messages = @(
@{ role = "user"; content = "What does the document say about pricing?" }
)
} | ConvertTo-Json -Depth 3
$response = Invoke-RestMethod -Uri "http://localhost:5001/chat" `
-Method Post -ContentType "application/json" -Body $body
Write-Host $response.content
All endpoints support multi-turn conversations. Pass the full message history:
{
"messages": [
{ "role": "system", "content": "You are a legal assistant. Answer based only on the provided documents." },
{ "role": "user", "content": "What is the contract duration?" },
{ "role": "assistant", "content": "The contract duration is 12 months from the signing date." },
{ "role": "user", "content": "What happens if either party wants to terminate early?" }
]
}
| Code | Meaning |
|---|---|
200 |
Success |
400 |
Bad request (missing/empty messages array) |
404 |
Unknown endpoint |
500 |
Server error (model not loaded, internal failure) |
{
"error": {
"message": "Messages array is required",
"type": "invalid_request_error"
}
}