Commit 6774dcff authored by Administrator's avatar Administrator

Update 3 files via Son of Anton

parent 3568bd0b
"""
Web Search Service — Multi-strategy with robust fallbacks.
Primary: duckduckgo-search library (handles anti-bot internally)
Web Search Service — v4.1.1 — Robust multi-strategy with proper region handling.
Primary: duckduckgo-search library (forced US-English)
Fallback 1: DDG HTML scraping with anti-detection
Fallback 2: DDG Lite scraping
Fallback 3: Brave Search HTML scraping
"""
import re
......@@ -32,13 +33,21 @@ except ImportError:
_executor = ThreadPoolExecutor(max_workers=2)
_USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15",
]
# Domains that are NEVER useful in English search results
BLOCKED_DOMAINS = {
"baidu.com", "zhihu.com", "csdn.net", "bilibili.com", "weibo.com",
"sogou.com", "163.com", "qq.com", "taobao.com", "jd.com",
"douyin.com", "tiktok.com", "yandex.ru", "mail.ru",
"duckduckgo.com", "search.brave.com", "google.com/search",
}
def _get_headers():
return {
......@@ -57,6 +66,31 @@ def _get_headers():
}
def _is_valid_result(url: str, title: str) -> bool:
"""Filter out garbage results — non-English, ad domains, empty junk."""
if not url or not url.startswith("http"):
return False
if not title or len(title.strip()) < 3:
return False
try:
parsed = urlparse(url)
domain = parsed.netloc.lower().lstrip("www.")
except Exception:
return False
# Block known garbage domains
for blocked in BLOCKED_DOMAINS:
if domain == blocked or domain.endswith("." + blocked):
return False
# Block results that are clearly search engine pages, not actual results
if "/search?" in url or "/search/" in url:
return False
return True
# ═══════════════════════════════════════════════════
# Main entry point
# ═══════════════════════════════════════════════════
......@@ -90,13 +124,20 @@ async def search_web(query: str, num_results: int = 8, fetch_pages: int = 3) ->
if results:
logger.info(f"DDG Lite returned {len(results)} results")
# Strategy 4: Brave Search (no API key needed for basic)
# Strategy 4: Brave Search
if not results:
logger.info("DDG Lite failed, trying Brave Search...")
results = await _brave_html_search(query, num_results)
if results:
logger.info(f"Brave HTML search returned {len(results)} results")
# Strategy 5: Google scraping (last resort)
if not results:
logger.info("Brave failed, trying Google scraping...")
results = await _google_html_search(query, num_results)
if results:
logger.info(f"Google HTML search returned {len(results)} results")
if not results:
logger.warning(f"ALL search strategies failed for query: '{query[:80]}'")
return f"[Web search for '{query}' returned no results. All search strategies exhausted. Answer from your own knowledge.]"
......@@ -138,20 +179,56 @@ async def search_web(query: str, num_results: int = 8, fetch_pages: int = 3) ->
def _ddgs_sync_search(query: str, n: int) -> list[dict]:
"""Synchronous DDGS search — runs in thread executor."""
try:
with DDGS() as ddgs:
raw = list(ddgs.text(query, max_results=n, region="wt-wt", safesearch="off"))
results = []
for r in raw:
title = r.get("title", "").strip()
url = r.get("href", r.get("link", "")).strip()
snippet = r.get("body", r.get("snippet", "")).strip()
if title and url and url.startswith("http"):
results.append({"title": title, "url": url, "snippet": snippet})
return results
except Exception as e:
logger.error(f"DDGS library search error: {type(e).__name__}: {e}")
return []
results = []
# Try multiple configurations in order
configs = [
{"region": "us-en", "backend": "auto"},
{"region": "us-en", "backend": "html"},
{"region": "us-en", "backend": "lite"},
{"region": "wt-wt", "backend": "auto"},
]
for cfg in configs:
if results:
break
try:
with DDGS() as ddgs:
kwargs = {
"keywords": query,
"max_results": n + 5, # Request extra, filter later
"region": cfg["region"],
"safesearch": "moderate",
}
# backend parameter exists in newer versions
try:
kwargs["backend"] = cfg["backend"]
raw = list(ddgs.text(**kwargs))
except TypeError:
# Older version without backend parameter
del kwargs["backend"]
raw = list(ddgs.text(**kwargs))
for r in raw:
title = r.get("title", "").strip()
url = r.get("href", r.get("link", "")).strip()
snippet = r.get("body", r.get("snippet", "")).strip()
if _is_valid_result(url, title):
results.append({"title": title, "url": url, "snippet": snippet})
if len(results) >= n:
break
if results:
logger.info(f"DDGS config {cfg} returned {len(results)} valid results")
return results
except Exception as e:
logger.warning(f"DDGS config {cfg} failed: {type(e).__name__}: {e}")
continue
return results
async def _ddgs_library_search(query: str, n: int) -> list[dict]:
......@@ -160,7 +237,13 @@ async def _ddgs_library_search(query: str, n: int) -> list[dict]:
return []
try:
loop = asyncio.get_event_loop()
return await loop.run_in_executor(_executor, _ddgs_sync_search, query, n)
return await asyncio.wait_for(
loop.run_in_executor(_executor, _ddgs_sync_search, query, n),
timeout=30.0,
)
except asyncio.TimeoutError:
logger.error("DDGS library search timed out")
return []
except Exception as e:
logger.error(f"DDGS executor error: {e}")
return []
......@@ -188,13 +271,11 @@ async def _ddg_html_search(query: str, n: int) -> list[dict]:
except Exception:
pass
# Small random delay to appear more human
await asyncio.sleep(random.uniform(0.3, 1.0))
# Now do the actual search
resp = await client.post(
"https://html.duckduckgo.com/html/",
data={"q": query, "b": "", "kl": ""},
data={"q": query, "b": "", "kl": "us-en", "kp": "-1"},
headers=headers,
)
......@@ -204,7 +285,6 @@ async def _ddg_html_search(query: str, n: int) -> list[dict]:
html = resp.text
# Debug: check if we got a CAPTCHA or empty page
if len(html) < 500:
logger.warning(f"DDG HTML response suspiciously short: {len(html)} chars")
return []
......@@ -248,24 +328,24 @@ async def _ddg_html_search(query: str, n: int) -> list[dict]:
parsed = parse_qs(urlparse(href).query)
href = parsed.get("uddg", parsed.get("u", [href]))[0]
if title and href and href.startswith("http"):
if _is_valid_result(href, title):
results.append({"title": title, "url": href, "snippet": snippet})
if len(results) >= n:
break
if results:
break # Stop trying selectors if we found results
break
# Last resort: find ANY links that look like search results
if not results:
# Last resort: find ANY links that look like search results
for a_tag in soup.find_all("a", href=True):
href = a_tag.get("href", "")
text = a_tag.get_text(strip=True)
if "uddg=" in href:
parsed = parse_qs(urlparse(href).query)
real_url = parsed.get("uddg", [""])[0]
if real_url and real_url.startswith("http") and text and len(text) > 5:
if _is_valid_result(real_url, text):
results.append({"title": text, "url": real_url, "snippet": ""})
if len(results) >= n:
break
......@@ -290,7 +370,7 @@ async def _ddg_lite_search(query: str, n: int) -> list[dict]:
) as client:
resp = await client.post(
"https://lite.duckduckgo.com/lite/",
data={"q": query},
data={"q": query, "kl": "us-en"},
headers=_get_headers(),
)
if resp.status_code != 200:
......@@ -303,25 +383,24 @@ async def _ddg_lite_search(query: str, n: int) -> list[dict]:
soup = BeautifulSoup(html, "html.parser")
results = []
# DDG Lite uses a table-based layout
# Results are in <a class="result-link"> or just regular <a> tags in result rows
for a_tag in soup.select("a.result-link"):
href = a_tag.get("href", "")
title = a_tag.get_text(strip=True)
if href.startswith("http") and title and "duckduckgo" not in href.lower():
# Try to find snippet in next table row
snippet = ""
parent_tr = a_tag.find_parent("tr")
if parent_tr:
next_tr = parent_tr.find_next_sibling("tr")
if next_tr:
snippet_td = next_tr.find("td", class_="result-snippet")
if snippet_td:
snippet = snippet_td.get_text(strip=True)[:300]
elif next_tr:
snippet = next_tr.get_text(strip=True)[:300]
if not _is_valid_result(href, title):
continue
results.append({"title": title, "url": href, "snippet": snippet})
snippet = ""
parent_tr = a_tag.find_parent("tr")
if parent_tr:
next_tr = parent_tr.find_next_sibling("tr")
if next_tr:
snippet_td = next_tr.find("td", class_="result-snippet")
if snippet_td:
snippet = snippet_td.get_text(strip=True)[:300]
elif next_tr:
snippet = next_tr.get_text(strip=True)[:300]
results.append({"title": title, "url": href, "snippet": snippet})
if len(results) >= n:
break
......@@ -330,7 +409,7 @@ async def _ddg_lite_search(query: str, n: int) -> list[dict]:
for a_tag in soup.select("td a[href^='http']"):
href = a_tag.get("href", "")
title = a_tag.get_text(strip=True)
if title and "duckduckgo" not in href.lower() and len(title) > 5:
if _is_valid_result(href, title) and len(title) > 5:
results.append({"title": title, "url": href, "snippet": ""})
if len(results) >= n:
break
......@@ -347,7 +426,7 @@ async def _ddg_lite_search(query: str, n: int) -> list[dict]:
# ═══════════════════════════════════════════════════
async def _brave_html_search(query: str, n: int) -> list[dict]:
"""Scrape Brave Search as a last resort."""
"""Scrape Brave Search as a fallback."""
try:
headers = _get_headers()
headers["Referer"] = "https://search.brave.com/"
......@@ -371,7 +450,6 @@ async def _brave_html_search(query: str, n: int) -> list[dict]:
soup = BeautifulSoup(html, "html.parser")
results = []
# Brave uses various container classes
for item in soup.select(".snippet, [data-type='web']"):
title_el = item.select_one(".snippet-title, .title, a[href^='http']")
desc_el = item.select_one(".snippet-description, .snippet-content, .description")
......@@ -382,7 +460,6 @@ async def _brave_html_search(query: str, n: int) -> list[dict]:
title = title_el.get_text(strip=True)
href = title_el.get("href", "")
# Find the actual URL
if not href.startswith("http"):
link = item.select_one("a[href^='http']")
if link:
......@@ -390,7 +467,7 @@ async def _brave_html_search(query: str, n: int) -> list[dict]:
snippet = desc_el.get_text(strip=True) if desc_el else ""
if title and href and href.startswith("http") and "brave.com" not in href:
if _is_valid_result(href, title):
results.append({"title": title, "url": href, "snippet": snippet})
if len(results) >= n:
......@@ -404,6 +481,86 @@ async def _brave_html_search(query: str, n: int) -> list[dict]:
return []
# ═══════════════════════════════════════════════════
# Strategy 5: Google HTML scraping (last resort)
# ═══════════════════════════════════════════════════
async def _google_html_search(query: str, n: int) -> list[dict]:
"""Scrape Google search results as absolute last resort."""
try:
headers = _get_headers()
headers["Referer"] = "https://www.google.com/"
async with httpx.AsyncClient(
timeout=20.0,
follow_redirects=True,
) as client:
resp = await client.get(
"https://www.google.com/search",
params={"q": query, "hl": "en", "gl": "us", "num": str(n + 5)},
headers=headers,
)
if resp.status_code != 200:
logger.warning(f"Google returned status {resp.status_code}")
return []
html = resp.text
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
results = []
# Google search results are in divs with class 'g' or similar
for g_div in soup.select("div.g, div.tF2Cxc, div[data-sokoban-container]"):
# Find the link
link_el = g_div.select_one("a[href^='http']")
if not link_el:
continue
href = link_el.get("href", "")
# Clean Google redirect URLs
if href.startswith("/url?"):
parsed = parse_qs(urlparse(href).query)
href = parsed.get("q", parsed.get("url", [href]))[0]
# Find title
title_el = g_div.select_one("h3")
title = title_el.get_text(strip=True) if title_el else link_el.get_text(strip=True)
# Find snippet
snippet_el = g_div.select_one(".VwiC3b, .IsZvec, .s3v9rd, span.st")
snippet = snippet_el.get_text(strip=True) if snippet_el else ""
if _is_valid_result(href, title):
results.append({"title": title, "url": href, "snippet": snippet})
if len(results) >= n:
break
# Fallback: just find any h3 > a patterns
if not results:
for h3 in soup.find_all("h3"):
parent_a = h3.find_parent("a")
if parent_a:
href = parent_a.get("href", "")
if href.startswith("/url?"):
parsed = parse_qs(urlparse(href).query)
href = parsed.get("q", parsed.get("url", [href]))[0]
title = h3.get_text(strip=True)
if _is_valid_result(href, title):
results.append({"title": title, "url": href, "snippet": ""})
if len(results) >= n:
break
logger.info(f"Google HTML parsed {len(results)} results")
return results
except Exception as e:
logger.error(f"Google HTML search error: {type(e).__name__}: {e}")
return []
# ═══════════════════════════════════════════════════
# Page content fetcher
# ═══════════════════════════════════════════════════
......
......@@ -7,9 +7,10 @@ import {
Edit3, Check, X, GitBranch,
} from "lucide-react";
export default function Sidebar({ activeChatId, onSelectChat, isOpen, onClose }) {
export default function Sidebar({ mobile, onClose }) {
const { state, dispatch } = useApp();
const nav = useNavigate();
const activeChatId = state.activeChatId;
const [editId, setEditId] = useState(null);
const [editTitle, setEditTitle] = useState("");
......@@ -22,11 +23,15 @@ export default function Sidebar({ activeChatId, onSelectChat, isOpen, onClose })
})();
}, [state.token, dispatch]);
function handleSelectChat(chatId) {
dispatch({ type: "SET_ACTIVE_CHAT", chatId });
if (onClose) onClose();
}
async function handleNew() {
try {
const chat = await createChat(state.token);
dispatch({ type: "ADD_CHAT", chat });
onSelectChat(chat.id);
} catch { }
}
......@@ -36,7 +41,6 @@ export default function Sidebar({ activeChatId, onSelectChat, isOpen, onClose })
try {
await deleteChat(state.token, chatId);
dispatch({ type: "REMOVE_CHAT", chatId });
if (activeChatId === chatId) onSelectChat(null);
} catch { }
}
......@@ -52,73 +56,71 @@ export default function Sidebar({ activeChatId, onSelectChat, isOpen, onClose })
const isSuperadmin = state.user?.role === "superadmin";
return (
<>
{isOpen && <div className="fixed inset-0 bg-black/50 z-40 md:hidden" onClick={onClose} />}
<div className={`fixed md:static z-50 inset-y-0 left-0 w-72 bg-anton-surface border-r border-anton-border flex flex-col transition-transform duration-200 ${isOpen ? "translate-x-0" : "-translate-x-full md:translate-x-0"}`}>
{/* Header */}
<div className="p-3 border-b border-anton-border">
<div className="flex items-center gap-2 mb-3">
<div className="w-8 h-8 rounded-lg bg-gradient-to-br from-anton-accent to-red-600 flex items-center justify-center">
<Flame size={16} className="text-white" />
</div>
<div>
<h1 className="text-sm font-bold text-white">Son of Anton</h1>
<p className="text-[10px] text-anton-muted">v4.0.0 — The Architect</p>
</div>
<div className={`${mobile ? "h-full" : "h-dvh"} w-72 bg-anton-surface border-r border-anton-border flex flex-col`}>
{/* Header */}
<div className="p-3 border-b border-anton-border">
<div className="flex items-center gap-2 mb-3">
<div className="w-8 h-8 rounded-lg bg-gradient-to-br from-anton-accent to-red-600 flex items-center justify-center">
<Flame size={16} className="text-white" />
</div>
<div>
<h1 className="text-sm font-bold text-white">Son of Anton</h1>
<p className="text-[10px] text-anton-muted">v4.1.0 — The Architect</p>
</div>
<button onClick={handleNew} className="w-full flex items-center justify-center gap-1.5 bg-anton-accent text-white rounded-lg py-2 text-sm hover:opacity-80 transition">
<Plus size={16} /> New Chat
</button>
</div>
<button onClick={handleNew} className="w-full flex items-center justify-center gap-1.5 bg-anton-accent text-white rounded-lg py-2 text-sm hover:opacity-80 transition">
<Plus size={16} /> New Chat
</button>
</div>
{/* Chat list */}
<div className="flex-1 overflow-y-auto p-2 space-y-0.5">
{state.chats.map((c) => (
<div key={c.id} onClick={() => { onSelectChat(c.id); onClose?.(); }}
className={`group flex items-center gap-2 px-3 py-2 rounded-lg cursor-pointer transition text-sm ${activeChatId === c.id ? "bg-anton-accent/15 text-white" : "text-anton-muted hover:bg-anton-card hover:text-white"}`}>
<MessageSquare size={14} className="shrink-0" />
{editId === c.id ? (
<div className="flex-1 flex items-center gap-1">
<input value={editTitle} onChange={(e) => setEditTitle(e.target.value)} onKeyDown={(e) => e.key === "Enter" && handleRename(c.id)}
className="flex-1 bg-anton-bg border border-anton-border rounded px-1.5 py-0.5 text-xs text-white" autoFocus />
<button onClick={() => handleRename(c.id)} className="text-green-400"><Check size={12} /></button>
<button onClick={() => setEditId(null)} className="text-red-400"><X size={12} /></button>
{/* Chat list */}
<div className="flex-1 overflow-y-auto p-2 space-y-0.5">
{state.chats.map((c) => (
<div key={c.id} onClick={() => handleSelectChat(c.id)}
className={`group flex items-center gap-2 px-3 py-2 rounded-lg cursor-pointer transition text-sm ${activeChatId === c.id ? "bg-anton-accent/15 text-white" : "text-anton-muted hover:bg-anton-card hover:text-white"}`}>
<MessageSquare size={14} className="shrink-0" />
{editId === c.id ? (
<div className="flex-1 flex items-center gap-1">
<input value={editTitle} onChange={(e) => setEditTitle(e.target.value)} onKeyDown={(e) => e.key === "Enter" && handleRename(c.id)}
className="flex-1 bg-anton-bg border border-anton-border rounded px-1.5 py-0.5 text-xs text-white" autoFocus
onClick={(e) => e.stopPropagation()} />
<button onClick={(e) => { e.stopPropagation(); handleRename(c.id); }} className="text-green-400"><Check size={12} /></button>
<button onClick={(e) => { e.stopPropagation(); setEditId(null); }} className="text-red-400"><X size={12} /></button>
</div>
) : (
<>
<span className="flex-1 truncate text-xs">{c.title}</span>
<div className="flex gap-0.5 opacity-0 group-hover:opacity-100 transition-opacity">
{c.linked_repo_id && <GitBranch size={11} className="text-orange-400" />}
<button onClick={(e) => { e.stopPropagation(); setEditId(c.id); setEditTitle(c.title); }} className="p-0.5 hover:text-anton-accent"><Edit3 size={11} /></button>
<button onClick={(e) => handleDelete(e, c.id)} className="p-0.5 hover:text-red-400"><Trash2 size={11} /></button>
</div>
) : (
<>
<span className="flex-1 truncate text-xs">{c.title}</span>
<div className="flex gap-0.5 opacity-0 group-hover:opacity-100 transition-opacity">
{c.linked_repo_id && <GitBranch size={11} className="text-orange-400" />}
<button onClick={(e) => { e.stopPropagation(); setEditId(c.id); setEditTitle(c.title); }} className="p-0.5 hover:text-anton-accent"><Edit3 size={11} /></button>
<button onClick={(e) => handleDelete(e, c.id)} className="p-0.5 hover:text-red-400"><Trash2 size={11} /></button>
</div>
</>
)}
</div>
))}
</div>
</>
)}
</div>
))}
</div>
{/* Footer */}
<div className="p-2 border-t border-anton-border space-y-0.5">
{isSuperadmin && (
<>
<button onClick={() => nav("/gitlab")} className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-orange-400 hover:bg-anton-card transition">
<GitBranch size={14} /> GitLab Center
</button>
<button onClick={() => nav("/admin")} className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-anton-muted hover:bg-anton-card hover:text-white transition">
<Shield size={14} /> Admin
</button>
</>
)}
<button onClick={() => nav("/knowledge")} className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-anton-muted hover:bg-anton-card hover:text-white transition">
<BookOpen size={14} /> Knowledge
</button>
<button onClick={() => dispatch({ type: "LOGOUT" })} className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-anton-muted hover:bg-anton-card hover:text-red-400 transition">
<LogOut size={14} /> Logout
</button>
<div className="px-3 py-1 text-[10px] text-anton-muted">{state.user?.username}{state.user?.role}</div>
</div>
{/* Footer */}
<div className="p-2 border-t border-anton-border space-y-0.5">
{isSuperadmin && (
<>
<button onClick={() => nav("/gitlab")} className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-orange-400 hover:bg-anton-card transition">
<GitBranch size={14} /> GitLab Center
</button>
<button onClick={() => nav("/admin")} className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-anton-muted hover:bg-anton-card hover:text-white transition">
<Shield size={14} /> Admin
</button>
</>
)}
<button onClick={() => nav("/knowledge")} className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-anton-muted hover:bg-anton-card hover:text-white transition">
<BookOpen size={14} /> Knowledge
</button>
<button onClick={() => dispatch({ type: "LOGOUT" })} className="w-full flex items-center gap-2 px-3 py-2 rounded-lg text-sm text-anton-muted hover:bg-anton-card hover:text-red-400 transition">
<LogOut size={14} /> Logout
</button>
<div className="px-3 py-1 text-[10px] text-anton-muted">{state.user?.username}{state.user?.role}</div>
</div>
</>
</div>
);
}
\ No newline at end of file
......@@ -29,7 +29,7 @@ export default function ChatPage() {
return (
<div className="h-full h-dvh flex overflow-hidden bg-anton-bg">
{/* Desktop sidebar */}
{/* Desktop sidebar — always visible */}
<div className="hidden sm:flex">
<Sidebar />
</div>
......@@ -75,7 +75,7 @@ export default function ChatPage() {
{/* Chat or empty state */}
{state.activeChatId ? (
<ChatView chatId={state.activeChatId} />
<ChatView key={state.activeChatId} chatId={state.activeChatId} />
) : (
<div className="flex-1 flex items-center justify-center p-6">
<div className="text-center max-w-sm">
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment