Ready-to-run examples covering the main hallx patterns. Each sample can be saved as a .py file and run directly. Install hallx first: pip install hallx.
from hallx import Hallx
checker = Hallx(profile="balanced")
result = checker.check(
prompt="Summarize refund policy",
response={"summary": "Refunds are allowed within 30 days."},
context=["Refunds are allowed within 30 days of purchase."],
schema={
"type": "object",
"properties": {"summary": {"type": "string"}},
"required": ["summary"],
"additionalProperties": False,
},
)
print(f"confidence : {result.confidence:.3f}")
print(f"risk_level : {result.risk_level}")
print(f"scores : {result.scores}")
print(f"issues : {result.issues}")
print(f"action : {result.recommendation['action']}")import asyncio
import os
from hallx import Hallx
from hallx.adapters import OpenAIAdapter
# Set OPENAI_API_KEY in your environment before running
adapter = OpenAIAdapter(model="gpt-4o-mini")
checker = Hallx(profile="balanced", adapter=adapter)
CONTEXT = [
"Ibuprofen is a nonsteroidal anti-inflammatory drug (NSAID).",
"Common side effects include nausea, stomach pain, heartburn, and dizziness.",
"Serious side effects may include stomach bleeding, kidney problems, and increased "
"risk of heart attack or stroke.",
]
async def main():
result = await checker.check_async(
prompt="What are the side effects of ibuprofen?",
context=CONTEXT,
)
print(f"confidence : {result.confidence:.3f}")
print(f"risk_level : {result.risk_level}")
print(f"grounding : {result.scores.get('grounding'):.3f}")
if result.risk_level == "high":
print("⚠ High risk — do not serve this response.")
for issue in result.issues:
print(f" • {issue}")
else:
print("✓ Response passed hallucination check.")
asyncio.run(main())import asyncio
from hallx import Hallx
from hallx.adapters import OpenAIAdapter
adapter = OpenAIAdapter(model="gpt-4o-mini")
checker = Hallx(profile="balanced", adapter=adapter)
async def main():
# No context= or schema= — only schema + consistency signals run.
# Grounding check is skipped, incurring a skip_penalty of 0.25.
result = await checker.check_async(
prompt="Explain the water cycle.",
)
print(f"confidence : {result.confidence:.3f}")
print(f"risk_level : {result.risk_level}")
print(f"scores : {result.scores}")
# Expect lower confidence than with context — skip penalty applied
print("note: grounding was skipped (no context provided)")
asyncio.run(main())from hallx import Hallx
def call_llm(prompt: str, temperature: float = 0.7) -> str:
# Replace with your actual LLM call
return "Model response here"
checker = Hallx(profile="balanced")
prompt = "What is the boiling point of water at sea level?"
context = ["Water boils at 100°C (212°F) at standard atmospheric pressure."]
temperature = 0.7
MAX_RETRIES = 3
result = None
for attempt in range(1, MAX_RETRIES + 1):
response = call_llm(prompt, temperature=temperature)
result = checker.check(
prompt=prompt, response=response, context=context
)
print(f"attempt {attempt}: confidence={result.confidence:.3f} action={result.recommendation['action']}")
if result.recommendation["action"] == "proceed":
print("✓ Passed — using this response.")
break
temperature = result.recommendation.get("suggested_temperature", 0.3)
print(f" retrying with temperature={temperature}")
else:
print("All retries exhausted — escalating to human review.")from hallx import Hallx, HallxHighRiskError
checker = Hallx(strict=True, profile="strict")
prompt = "What is the maximum safe dose of acetaminophen per day?"
context = [
"The maximum recommended dose of acetaminophen for adults is 4,000 mg per day.",
"Exceeding this dose can cause serious liver damage.",
]
try:
result = checker.check(
prompt=prompt,
response="The maximum safe dose is 4 grams (4,000 mg) per day for adults.",
context=context,
)
# Only reached when risk_level is NOT "high"
print(f"✓ Low risk: confidence={result.confidence:.3f}")
serve_response(result)
except HallxHighRiskError as e:
# High-risk response blocked automatically
print(f"✗ Blocked: {e}")
serve_safe_fallback()from hallx import Hallx
checker = Hallx(
profile="balanced",
feedback_db_path="/var/lib/myapp/hallx.sqlite3",
)
PAIRS = [
("What is 2+2?", "4", "correct"),
("Capital of UK?", "Paris", "hallucinated"),
("Speed of light?", "299,792,458 m/s", "correct"),
]
for prompt, response, label in PAIRS:
result = checker.check(prompt=prompt, response=response)
checker.record_outcome(
result=result,
label=label,
metadata={"reviewer": "qa-team"},
prompt=prompt,
response_excerpt=response,
)
print(f"recorded: {prompt!r} → {label}")
# Generate calibration report for the last 30 days
report = checker.calibration_report(window_days=30)
print("\n── Calibration Report ──")
print(f"hallucination_rate : {report['hallucination_rate']:.2%}")
print(f"suggested_threshold : {report['suggested_threshold']:.3f}")
print(f"threshold_metrics : {report['threshold_metrics']}")import asyncio
from hallx import Hallx
from hallx.adapters import OpenAIAdapter
adapter = OpenAIAdapter(model="gpt-4o-mini")
checker = Hallx(
profile="balanced",
adapter=adapter,
feedback_db_path="hallx_feedback.sqlite3",
)
DATASET = [
{
"prompt": "What causes the Northern Lights?",
"context": ["The aurora borealis is caused by solar particles interacting with Earth's magnetic field."],
"label": "correct",
},
{
"prompt": "Who invented the telephone?",
"context": ["Alexander Graham Bell is credited with inventing the telephone in 1876."],
"label": "correct",
},
]
async def run_batch():
for item in DATASET:
result = await checker.check_async(
prompt=item["prompt"],
context=item["context"],
)
checker.record_outcome(
result=result,
label=item["label"],
prompt=item["prompt"],
)
print(f"{item['prompt']!r}: confidence={result.confidence:.3f} risk={result.risk_level}")
report = checker.calibration_report(window_days=7)
print(f"\nsuggested_threshold: {report['suggested_threshold']:.3f}")
asyncio.run(run_batch())