Recipe: Perplexity-like answer engine

A minimal pipeline that fetches top search results via SerpAPI, extracts clean text with trafilatura, and feeds them into a local LLM (Ollama + Mistral) to synthesize a single cited answer. The LLM is instructed to cite sources inline and refuse when confidence is low. No vector DB, no RAG — just raw retrieval-augmented generation in under 80 lines.

import requests, trafilatura, json

QUERY = "latest nimbus loader architecture"
SERPAPI_KEY = "your_key"

params = {"q": QUERY, "api_key": SERPAPI_KEY, "num": 5}
results = requests.get(
    "https://serpapi.com/search", params=params
).json()

docs = []
for r in results.get("organic_results", []):
    html = requests.get(r["link"], timeout=10).text
    text = trafilatura.extract(html)
    if text:
        docs.append({"url": r["link"], "text": text[:2000]})

context = "\n\n".join(
    f"[{i+1}] {d['url']}\n{d['text']}" for i, d in enumerate(docs)
)

prompt = f"""Answer using ONLY the sources below.
Cite as [1], [2]. If unsure, say so.

{context}

Question: {QUERY}"""

payload = {"model": "mistral", "prompt": prompt, "stream": False}
answer = requests.post(
    "http://localhost:11434/api/generate", json=payload
).json()

print(answer["response"])