Function bodies 35 total
normalize_url function · python · L43-L48 (6 LOC)scripts/generate-bloom.py
def normalize_url(raw: str) -> str | None:
"""Normalize a URL using the same rules as the extension."""
try:
return _normalize_url_inner(raw)
except Exception:
return None_normalize_url_inner function · python · L51-L92 (42 LOC)scripts/generate-bloom.py
def _normalize_url_inner(raw: str) -> str | None:
parsed = urlparse(raw)
if parsed.scheme not in ("http", "https"):
return None
scheme = "https"
hostname = parsed.hostname
if not hostname:
return None
hostname = hostname.lower()
if hostname.startswith("www."):
hostname = hostname[4:]
# YouTube special handling
if hostname in ("youtube.com", "m.youtube.com"):
if parsed.path == "/watch":
from urllib.parse import parse_qs
params = parse_qs(parsed.query)
video_id = params.get("v", [None])[0]
if video_id:
return f"https://youtube.com/watch?v={video_id}"
if parsed.path.startswith("/embed/"):
parts = parsed.path.split("/")
video_id = parts[2] if len(parts) > 2 else None
if video_id:
return f"https://youtube.com/watch?v={video_id}"
elif hostname == "youtu.be":
video_id = parsed.path.lstrifnv1a function · python · L95-L101 (7 LOC)scripts/generate-bloom.py
def fnv1a(data: bytes, seed: int) -> int:
"""FNV-1a hash matching the extension's JS implementation."""
hash_val = 2166136261 ^ seed
for byte in data:
hash_val ^= byte
hash_val = (hash_val * 16777619) & 0xFFFFFFFF
return hash_valbloom_add function · python · L104-L108 (5 LOC)scripts/generate-bloom.py
def bloom_add(bits: bitarray, value: str, num_hashes: int, num_bits: int):
encoded = value.encode("utf-8")
for i in range(num_hashes):
h = fnv1a(encoded, i)
bits[h % num_bits] = 1main function · python · L111-L151 (41 LOC)scripts/generate-bloom.py
def main():
parser = argparse.ArgumentParser(description="Generate Bloom filter for HN URLs")
parser.add_argument("--output", default="bloom.bin", help="Output file path")
args = parser.parse_args()
print("Querying BigQuery for HN URLs...")
client = bigquery.Client()
query_job = client.query(BIGQUERY_QUERY)
rows = list(query_job.result())
print(f" Fetched {len(rows)} URLs")
print("Normalizing URLs...")
normalized = set()
skipped = 0
for row in rows:
result = normalize_url(row.url)
if result:
normalized.add(result)
else:
skipped += 1
print(f" {len(normalized)} unique normalized URLs ({skipped} skipped)")
print(f"Building Bloom filter (bits={NUM_BITS}, hashes={NUM_HASHES})...")
bits = bitarray(NUM_BITS)
bits.setall(0)
for url in normalized:
bloom_add(bits, url, NUM_HASHES, NUM_BITS)
fill_rate = bits.count() / NUM_BITS
theoretical_fpr = fill_rate**NUM_HupdateBadge function · typescript · L5-L14 (10 LOC)src/entrypoints/background.ts
async function updateBadge(tabId: number, discussions: Discussion[]): Promise<void> {
const count = discussions.length;
await browser.action.setBadgeText({
tabId,
text: count > 0 ? String(count) : '',
});
if (count > 0) {
await browser.action.setBadgeBackgroundColor({ tabId, color: '#6366f1' });
}
}onTabUpdated function · typescript · L16-L24 (9 LOC)src/entrypoints/background.ts
async function onTabUpdated(tabId: number, url: string): Promise<void> {
try {
const discussions = await discoverDiscussions(url);
await updateBadge(tabId, discussions);
} catch (error) {
console.error('[discussed] discovery failed:', error);
await browser.action.setBadgeText({ tabId, text: '' });
}
}Open data scored by Repobility · https://repobility.com
fnv1a function · typescript · L21-L28 (8 LOC)src/lib/bloom.ts
function fnv1a(data: Uint8Array, seed: number): number {
let hash = 2166136261 ^ seed;
for (const byte of data) {
hash ^= byte;
hash = Math.imul(hash, 16777619);
}
return hash >>> 0;
}bloomCheck function · typescript · L30-L42 (13 LOC)src/lib/bloom.ts
function bloomCheck(filter: StoredBloom, value: string): boolean {
const encoded = new TextEncoder().encode(value);
for (let i = 0; i < filter.numHashes; i++) {
const hash = fnv1a(encoded, i);
const bitIndex = hash % filter.numBits;
const byteIndex = bitIndex >>> 3;
const bitOffset = bitIndex & 7;
if ((filter.buffer[byteIndex] & (1 << bitOffset)) === 0) {
return false;
}
}
return true;
}parseBloomFilter function · typescript · L44-L50 (7 LOC)src/lib/bloom.ts
function parseBloomFilter(data: ArrayBuffer): StoredBloom {
const view = new DataView(data);
const numHashes = view.getUint32(0, true);
const numBits = view.getUint32(4, true);
const buffer = Array.from(new Uint8Array(data, 8));
return { buffer, numHashes, numBits };
}loadFromStorage function · typescript · L52-L55 (4 LOC)src/lib/bloom.ts
async function loadFromStorage(): Promise<StoredBloom | null> {
const result = await browser.storage.local.get(BLOOM_STORAGE_KEY);
return (result[BLOOM_STORAGE_KEY] as StoredBloom) ?? null;
}saveToStorage function · typescript · L57-L62 (6 LOC)src/lib/bloom.ts
async function saveToStorage(filter: StoredBloom, version: string): Promise<void> {
await browser.storage.local.set({
[BLOOM_STORAGE_KEY]: filter,
[BLOOM_VERSION_KEY]: version,
});
}getBloomFilter function · typescript · L64-L68 (5 LOC)src/lib/bloom.ts
export async function getBloomFilter(): Promise<StoredBloom | null> {
if (cachedFilter) return cachedFilter;
cachedFilter = await loadFromStorage();
return cachedFilter;
}checkBloomFilter function · typescript · L70-L72 (3 LOC)src/lib/bloom.ts
export function checkBloomFilter(filter: StoredBloom, url: string): boolean {
return bloomCheck(filter, url);
}updateBloomFilter function · typescript · L74-L106 (33 LOC)src/lib/bloom.ts
export async function updateBloomFilter(): Promise<void> {
try {
// Check latest release
const releaseResp = await fetch(`https://api.github.com/repos/${BLOOM_REPO}/releases/latest`);
if (!releaseResp.ok) return;
const release: {
tag_name: string;
assets: Array<{ name: string; browser_download_url: string }>;
} = await releaseResp.json();
// Check if we already have this version
const stored = await browser.storage.local.get(BLOOM_VERSION_KEY);
const currentVersion = stored[BLOOM_VERSION_KEY] as string | undefined;
if (currentVersion === release.tag_name) return;
// Find bloom filter asset
const asset = release.assets.find((a) => a.name === 'bloom.bin');
if (!asset) return;
// Download
const filterResp = await fetch(asset.browser_download_url);
if (!filterResp.ok) return;
const data = await filterResp.arrayBuffer();
const filter = parseBloomFilter(data);
await saveToStorage(filter, release.tag_name);
cachedFilter = filter;
} catch (erProvenance: Repobility (https://repobility.com) — every score reproducible from /scan/
cacheGet function · typescript · L8-L20 (13 LOC)src/lib/cache.ts
export async function cacheGet<T>(key: string): Promise<T | null> {
const storageKey = `${PREFIX}${key}`;
const result = await browser.storage.local.get(storageKey);
const entry = result[storageKey] as CacheEntry<T> | undefined;
if (!entry) return null;
if (Date.now() > entry.expiresAt) {
await browser.storage.local.remove(storageKey);
return null;
}
return entry.data;
}cacheSet function · typescript · L22-L29 (8 LOC)src/lib/cache.ts
export async function cacheSet<T>(key: string, data: T, ttlMs: number): Promise<void> {
const storageKey = `${PREFIX}${key}`;
const entry: CacheEntry<T> = {
data,
expiresAt: Date.now() + ttlMs,
};
await browser.storage.local.set({ [storageKey]: entry });
}flattenHnComments function · typescript · L20-L36 (17 LOC)src/lib/comments.ts
function flattenHnComments(item: HnItem, depth = 0): Comment[] {
const comments: Comment[] = [];
if (item.author && item.text) {
comments.push({
id: String(item.id),
author: item.author,
text: stripHtml(item.text),
score: item.points ?? 0,
depth,
platform: 'hn',
});
}
for (const child of item.children ?? []) {
comments.push(...flattenHnComments(child, depth + 1));
}
return comments;
}fetchHnComments function · typescript · L38-L47 (10 LOC)src/lib/comments.ts
export async function fetchHnComments(storyId: string): Promise<Comment[]> {
try {
const response = await fetch(`https://hn.algolia.com/api/v1/items/${storyId}`);
if (!response.ok) return [];
const data: HnItem = await response.json();
return flattenHnComments(data);
} catch {
return [];
}
}flattenRedditComments function · typescript · L60-L82 (23 LOC)src/lib/comments.ts
function flattenRedditComments(
children: Array<{ kind: string; data: RedditCommentData }>,
): Comment[] {
const comments: Comment[] = [];
for (const child of children) {
if (child.kind !== 't1') continue;
const d = child.data;
if (d.author && d.body && d.author !== '[deleted]' && d.author !== 'AutoModerator') {
comments.push({
id: d.id ?? '',
author: d.author,
text: d.body,
score: d.score ?? 0,
depth: d.depth ?? 0,
platform: 'reddit',
});
}
if (d.replies && typeof d.replies === 'object' && d.replies.data?.children) {
comments.push(...flattenRedditComments(d.replies.data.children));
}
}
return comments;
}fetchRedditComments function · typescript · L84-L98 (15 LOC)src/lib/comments.ts
export async function fetchRedditComments(permalink: string): Promise<Comment[]> {
try {
const url = `https://www.reddit.com${permalink}.json?limit=100`;
const response = await fetch(url, {
headers: { 'User-Agent': 'discussed/0.1' },
});
if (!response.ok) return [];
const data: Array<{ data: { children: Array<{ kind: string; data: RedditCommentData }> } }> =
await response.json();
if (data.length < 2) return [];
return flattenRedditComments(data[1].data.children);
} catch {
return [];
}
}stripHtml function · typescript · L102-L112 (11 LOC)src/lib/comments.ts
function stripHtml(html: string): string {
return html
.replace(/<[^>]+>/g, ' ')
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(/\s+/g, ' ')
.trim();
}discoverDiscussions function · typescript · L13-L42 (30 LOC)src/lib/discovery.ts
export async function discoverDiscussions(
rawUrl: string,
options: DiscoverOptions = {},
): Promise<Discussion[]> {
const userSettings = await settings.getValue();
const keepQuery = !userSettings.ignoreQueryString;
const url = normalizeUrl(rawUrl, { keepQueryString: keepQuery });
const cacheTtlMs = userSettings.cacheDurationMinutes * 60 * 1000;
const cacheKey = `discussions:${url}`;
if (!options.force) {
const cached = await cacheGet<Discussion[]>(cacheKey);
if (cached) return cached;
}
const searches: Array<Promise<Discussion[]>> = [];
if (userSettings.enableHn) searches.push(searchHn(url));
if (userSettings.enableReddit) searches.push(searchReddit(url));
if (userSettings.enableLobsters) searches.push(searchLobsters(url));
const results = await Promise.allSettled(searches);
const discussions = results.flatMap((result) =>
result.status === 'fulfilled' ? result.value : [],
);
await cacheSet(cacheKey, discussions, cacheTtlMs);
return discussions;
}Generated by Repobility's multi-pass static-analysis pipeline (https://repobility.com)
queryAlgolia function · typescript · L20-L41 (22 LOC)src/lib/hn.ts
async function queryAlgolia(url: string): Promise<Discussion[]> {
const params = new URLSearchParams({
query: url,
restrictSearchableAttributes: 'url',
hitsPerPage: '20',
});
const response = await fetch(`${ALGOLIA_SEARCH}?${params}`);
if (!response.ok) return [];
const data: AlgoliaResponse = await response.json();
return data.hits.map((hit) => ({
platform: 'hn' as const,
title: hit.title,
url: `https://news.ycombinator.com/item?id=${hit.objectID}`,
points: hit.points,
commentCount: hit.num_comments,
createdAt: new Date(hit.created_at).toISOString(),
externalId: hit.objectID,
}));
}searchHn function · typescript · L43-L58 (16 LOC)src/lib/hn.ts
export async function searchHn(url: string): Promise<Discussion[]> {
try {
const filter = await getBloomFilter();
if (filter) {
// Bloom filter available: check before querying
const maybePresent = checkBloomFilter(filter, url);
if (!maybePresent) return []; // Definitely not on HN
}
// Either Bloom filter says "maybe" or no filter available — query Algolia
return await queryAlgolia(url);
} catch {
return [];
}
}summarize function · typescript · L24-L59 (36 LOC)src/lib/llm.ts
export async function summarize(
commentsText: string,
options: SummarizeOptions,
): Promise<SummarizeResult> {
const userMessage = options.pageTitle
? `Page: ${options.pageTitle}\nURL: ${options.pageUrl}\n\nComments:\n${commentsText}`
: `URL: ${options.pageUrl}\n\nComments:\n${commentsText}`;
const response = await fetch(ANTHROPIC_API, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': options.apiKey,
'anthropic-version': '2023-06-01',
'anthropic-dangerous-direct-browser-access': 'true',
},
body: JSON.stringify({
model: options.model,
max_tokens: 1024,
system: SYSTEM_PROMPT,
messages: [{ role: 'user', content: userMessage }],
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Anthropic API error (${response.status}): ${error}`);
}
const data: { content: Array<{ type: string; text: string }> } = await response.json();
const text = data.content.find((c) => c.type === 'text')?.searchLobsters function · typescript · L14-L34 (21 LOC)src/lib/lobsters.ts
export async function searchLobsters(url: string): Promise<Discussion[]> {
try {
const { hostname } = new URL(url);
const response = await fetch(`https://lobste.rs/domains/${hostname}.json`);
if (!response.ok) return [];
const stories: LobstersStory[] = await response.json();
return stories.map((story) => ({
platform: 'lobsters' as const,
title: story.title,
url: story.comments_url,
points: story.score,
commentCount: story.comment_count,
createdAt: new Date(story.created_at).toISOString(),
externalId: story.short_id,
}));
} catch {
return [];
}
}preprocessComments function · typescript · L9-L28 (20 LOC)src/lib/preprocess.ts
export function preprocessComments(
comments: Comment[],
options: PreprocessOptions = { maxComments: 40 },
): Comment[] {
return (
comments
// Remove very short or empty comments
.filter((c) => c.text.length > 10)
// Sort by score descending
.sort((a, b) => b.score - a.score)
// Take top N
.slice(0, options.maxComments)
// Truncate long comments
.map((c) => ({
...c,
text:
c.text.length > MAX_COMMENT_LENGTH ? `${c.text.slice(0, MAX_COMMENT_LENGTH)}...` : c.text,
}))
);
}formatCommentsForPrompt function · typescript · L30-L37 (8 LOC)src/lib/preprocess.ts
export function formatCommentsForPrompt(comments: Comment[]): string {
return comments
.map((c) => {
const prefix = c.platform === 'reddit' ? `[Reddit, ${c.score} pts]` : `[HN, ${c.score} pts]`;
return `${prefix} ${c.author}: ${c.text}`;
})
.join('\n\n');
}searchReddit function · typescript · L22-L50 (29 LOC)src/lib/reddit.ts
export async function searchReddit(url: string): Promise<Discussion[]> {
try {
const params = new URLSearchParams({
q: `url:${url}`,
sort: 'top',
limit: '25',
});
const response = await fetch(`${REDDIT_SEARCH}?${params}`, {
headers: { 'User-Agent': 'discussed/0.1' },
});
if (!response.ok) return [];
const data: RedditListing = await response.json();
return data.data.children.map(({ data: post }) => ({
platform: 'reddit' as const,
title: post.title,
url: `https://www.reddit.com${post.permalink}`,
points: post.score,
commentCount: post.num_comments,
createdAt: new Date(post.created_utc * 1000).toISOString(),
externalId: post.name,
subreddit: post.subreddit,
}));
} catch {
return [];
}
}extractPermalink function · typescript · L16-L23 (8 LOC)src/lib/summarize.ts
function extractPermalink(redditUrl: string): string {
try {
const url = new URL(redditUrl);
return url.pathname;
} catch {
return '';
}
}Repobility · open methodology · https://repobility.com/research/
fetchAllComments function · typescript · L25-L34 (10 LOC)src/lib/summarize.ts
async function fetchAllComments(discussions: Discussion[]): Promise<Comment[]> {
const fetches = discussions.map((d) => {
if (d.platform === 'hn') return fetchHnComments(d.externalId);
if (d.platform === 'reddit') return fetchRedditComments(extractPermalink(d.url));
return Promise.resolve([]);
});
const results = await Promise.allSettled(fetches);
return results.flatMap((r) => (r.status === 'fulfilled' ? r.value : []));
}summarizeDiscussions function · typescript · L36-L80 (45 LOC)src/lib/summarize.ts
export async function summarizeDiscussions(
pageUrl: string,
discussions: Discussion[],
options: { force?: boolean } = {},
): Promise<SummaryResult> {
const cacheKey = `summary:${pageUrl}`;
if (!options.force) {
const cached = await cacheGet<SummaryResult>(cacheKey);
if (cached) return cached;
}
const userSettings = await settings.getValue();
if (!userSettings.apiKey) {
throw new Error('No API key configured. Add one in extension settings.');
}
const allComments = await fetchAllComments(discussions);
if (allComments.length === 0) {
throw new Error('No comments found to summarize.');
}
const processed = preprocessComments(allComments, {
maxComments: userSettings.maxCommentsForSummary,
});
const commentsText = formatCommentsForPrompt(processed);
const result = await summarize(commentsText, {
apiKey: userSettings.apiKey,
model: userSettings.model,
pageUrl,
});
const summaryResult: SummaryResult = {
summary: result.summary,
model: result.model,extractYouTubeVideoId function · typescript · L7-L26 (20 LOC)src/lib/url.ts
function extractYouTubeVideoId(url: URL): string | null {
const host = url.hostname.replace(/^www\./, '');
if (host === 'youtu.be') {
const id = url.pathname.slice(1);
return id || null;
}
if (YOUTUBE_HOSTS.has(url.hostname)) {
if (url.pathname === '/watch') {
return url.searchParams.get('v');
}
const embedMatch = url.pathname.match(/^\/embed\/([^/]+)/);
if (embedMatch) {
return embedMatch[1];
}
}
return null;
}normalizeUrl function · typescript · L28-L57 (30 LOC)src/lib/url.ts
export function normalizeUrl(raw: string, options: NormalizeUrlOptions = {}): string {
const url = new URL(raw);
// Upgrade http to https
url.protocol = 'https:';
// Remove www prefix
url.hostname = url.hostname.replace(/^www\./, '');
// YouTube special handling: normalize all video URL variants to canonical form
const videoId = extractYouTubeVideoId(url);
if (videoId) {
return `https://youtube.com/watch?v=${videoId}`;
}
// Strip fragment
url.hash = '';
// Strip query string unless opted in
if (!options.keepQueryString) {
url.search = '';
}
// Remove trailing slash (but preserve root "/")
if (url.pathname.length > 1 && url.pathname.endsWith('/')) {
url.pathname = url.pathname.slice(0, -1);
}
return url.toString();
}