Recipe: Perplexity-like answer engine
A minimal pipeline that fetches top search results via SerpAPI, extracts clean text with trafilatura, and feeds them into a local LLM (Ollama + Mistral) to synthesize a single cited answer. The LLM is instructed to cite sources inline and refuse when confidence is low. No vector DB, no RAG — just raw retrieval-augmented generation in under 80 lines.
import requests, trafilatura, json
QUERY = "latest nimbus loader architecture"
SERPAPI_KEY = "your_key"
params = {"q": QUERY, "api_key": SERPAPI_KEY, "num": 5}
results = requests.get(
"https://serpapi.com/search", params=params
).json()
docs = []
for r in results.get("organic_results", []):
html = requests.get(r["link"], timeout=10).text
text = trafilatura.extract(html)
if text:
docs.append({"url": r["link"], "text": text[:2000]})
context = "\n\n".join(
f"[{i+1}] {d['url']}\n{d['text']}" for i, d in enumerate(docs)
)
prompt = f"""Answer using ONLY the sources below.
Cite as [1], [2]. If unsure, say so.
{context}
Question: {QUERY}"""
payload = {"model": "mistral", "prompt": prompt, "stream": False}
answer = requests.post(
"http://localhost:11434/api/generate", json=payload
).json()
print(answer["response"])