← back to discussed-dev__extension

Function bodies 35 total

All specs Real LLM only Function bodies
normalize_url function · python · L43-L48 (6 LOC)
scripts/generate-bloom.py
def normalize_url(raw: str) -> str | None:
    """Normalize a URL using the same rules as the extension."""
    try:
        return _normalize_url_inner(raw)
    except Exception:
        return None
_normalize_url_inner function · python · L51-L92 (42 LOC)
scripts/generate-bloom.py
def _normalize_url_inner(raw: str) -> str | None:
    parsed = urlparse(raw)

    if parsed.scheme not in ("http", "https"):
        return None

    scheme = "https"

    hostname = parsed.hostname
    if not hostname:
        return None
    hostname = hostname.lower()
    if hostname.startswith("www."):
        hostname = hostname[4:]

    # YouTube special handling
    if hostname in ("youtube.com", "m.youtube.com"):
        if parsed.path == "/watch":
            from urllib.parse import parse_qs

            params = parse_qs(parsed.query)
            video_id = params.get("v", [None])[0]
            if video_id:
                return f"https://youtube.com/watch?v={video_id}"
        if parsed.path.startswith("/embed/"):
            parts = parsed.path.split("/")
            video_id = parts[2] if len(parts) > 2 else None
            if video_id:
                return f"https://youtube.com/watch?v={video_id}"
    elif hostname == "youtu.be":
        video_id = parsed.path.lstri
fnv1a function · python · L95-L101 (7 LOC)
scripts/generate-bloom.py
def fnv1a(data: bytes, seed: int) -> int:
    """FNV-1a hash matching the extension's JS implementation."""
    hash_val = 2166136261 ^ seed
    for byte in data:
        hash_val ^= byte
        hash_val = (hash_val * 16777619) & 0xFFFFFFFF
    return hash_val
bloom_add function · python · L104-L108 (5 LOC)
scripts/generate-bloom.py
def bloom_add(bits: bitarray, value: str, num_hashes: int, num_bits: int):
    encoded = value.encode("utf-8")
    for i in range(num_hashes):
        h = fnv1a(encoded, i)
        bits[h % num_bits] = 1
main function · python · L111-L151 (41 LOC)
scripts/generate-bloom.py
def main():
    parser = argparse.ArgumentParser(description="Generate Bloom filter for HN URLs")
    parser.add_argument("--output", default="bloom.bin", help="Output file path")
    args = parser.parse_args()

    print("Querying BigQuery for HN URLs...")
    client = bigquery.Client()
    query_job = client.query(BIGQUERY_QUERY)
    rows = list(query_job.result())
    print(f"  Fetched {len(rows)} URLs")

    print("Normalizing URLs...")
    normalized = set()
    skipped = 0
    for row in rows:
        result = normalize_url(row.url)
        if result:
            normalized.add(result)
        else:
            skipped += 1
    print(f"  {len(normalized)} unique normalized URLs ({skipped} skipped)")

    print(f"Building Bloom filter (bits={NUM_BITS}, hashes={NUM_HASHES})...")
    bits = bitarray(NUM_BITS)
    bits.setall(0)
    for url in normalized:
        bloom_add(bits, url, NUM_HASHES, NUM_BITS)

    fill_rate = bits.count() / NUM_BITS
    theoretical_fpr = fill_rate**NUM_H
updateBadge function · typescript · L5-L14 (10 LOC)
src/entrypoints/background.ts
async function updateBadge(tabId: number, discussions: Discussion[]): Promise<void> {
	const count = discussions.length;
	await browser.action.setBadgeText({
		tabId,
		text: count > 0 ? String(count) : '',
	});
	if (count > 0) {
		await browser.action.setBadgeBackgroundColor({ tabId, color: '#6366f1' });
	}
}
onTabUpdated function · typescript · L16-L24 (9 LOC)
src/entrypoints/background.ts
async function onTabUpdated(tabId: number, url: string): Promise<void> {
	try {
		const discussions = await discoverDiscussions(url);
		await updateBadge(tabId, discussions);
	} catch (error) {
		console.error('[discussed] discovery failed:', error);
		await browser.action.setBadgeText({ tabId, text: '' });
	}
}
Open data scored by Repobility · https://repobility.com
fnv1a function · typescript · L21-L28 (8 LOC)
src/lib/bloom.ts
function fnv1a(data: Uint8Array, seed: number): number {
	let hash = 2166136261 ^ seed;
	for (const byte of data) {
		hash ^= byte;
		hash = Math.imul(hash, 16777619);
	}
	return hash >>> 0;
}
bloomCheck function · typescript · L30-L42 (13 LOC)
src/lib/bloom.ts
function bloomCheck(filter: StoredBloom, value: string): boolean {
	const encoded = new TextEncoder().encode(value);
	for (let i = 0; i < filter.numHashes; i++) {
		const hash = fnv1a(encoded, i);
		const bitIndex = hash % filter.numBits;
		const byteIndex = bitIndex >>> 3;
		const bitOffset = bitIndex & 7;
		if ((filter.buffer[byteIndex] & (1 << bitOffset)) === 0) {
			return false;
		}
	}
	return true;
}
parseBloomFilter function · typescript · L44-L50 (7 LOC)
src/lib/bloom.ts
function parseBloomFilter(data: ArrayBuffer): StoredBloom {
	const view = new DataView(data);
	const numHashes = view.getUint32(0, true);
	const numBits = view.getUint32(4, true);
	const buffer = Array.from(new Uint8Array(data, 8));
	return { buffer, numHashes, numBits };
}
loadFromStorage function · typescript · L52-L55 (4 LOC)
src/lib/bloom.ts
async function loadFromStorage(): Promise<StoredBloom | null> {
	const result = await browser.storage.local.get(BLOOM_STORAGE_KEY);
	return (result[BLOOM_STORAGE_KEY] as StoredBloom) ?? null;
}
saveToStorage function · typescript · L57-L62 (6 LOC)
src/lib/bloom.ts
async function saveToStorage(filter: StoredBloom, version: string): Promise<void> {
	await browser.storage.local.set({
		[BLOOM_STORAGE_KEY]: filter,
		[BLOOM_VERSION_KEY]: version,
	});
}
getBloomFilter function · typescript · L64-L68 (5 LOC)
src/lib/bloom.ts
export async function getBloomFilter(): Promise<StoredBloom | null> {
	if (cachedFilter) return cachedFilter;
	cachedFilter = await loadFromStorage();
	return cachedFilter;
}
checkBloomFilter function · typescript · L70-L72 (3 LOC)
src/lib/bloom.ts
export function checkBloomFilter(filter: StoredBloom, url: string): boolean {
	return bloomCheck(filter, url);
}
updateBloomFilter function · typescript · L74-L106 (33 LOC)
src/lib/bloom.ts
export async function updateBloomFilter(): Promise<void> {
	try {
		// Check latest release
		const releaseResp = await fetch(`https://api.github.com/repos/${BLOOM_REPO}/releases/latest`);
		if (!releaseResp.ok) return;

		const release: {
			tag_name: string;
			assets: Array<{ name: string; browser_download_url: string }>;
		} = await releaseResp.json();

		// Check if we already have this version
		const stored = await browser.storage.local.get(BLOOM_VERSION_KEY);
		const currentVersion = stored[BLOOM_VERSION_KEY] as string | undefined;
		if (currentVersion === release.tag_name) return;

		// Find bloom filter asset
		const asset = release.assets.find((a) => a.name === 'bloom.bin');
		if (!asset) return;

		// Download
		const filterResp = await fetch(asset.browser_download_url);
		if (!filterResp.ok) return;

		const data = await filterResp.arrayBuffer();
		const filter = parseBloomFilter(data);

		await saveToStorage(filter, release.tag_name);
		cachedFilter = filter;
	} catch (er
Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/
cacheGet function · typescript · L8-L20 (13 LOC)
src/lib/cache.ts
export async function cacheGet<T>(key: string): Promise<T | null> {
	const storageKey = `${PREFIX}${key}`;
	const result = await browser.storage.local.get(storageKey);
	const entry = result[storageKey] as CacheEntry<T> | undefined;

	if (!entry) return null;
	if (Date.now() > entry.expiresAt) {
		await browser.storage.local.remove(storageKey);
		return null;
	}

	return entry.data;
}
cacheSet function · typescript · L22-L29 (8 LOC)
src/lib/cache.ts
export async function cacheSet<T>(key: string, data: T, ttlMs: number): Promise<void> {
	const storageKey = `${PREFIX}${key}`;
	const entry: CacheEntry<T> = {
		data,
		expiresAt: Date.now() + ttlMs,
	};
	await browser.storage.local.set({ [storageKey]: entry });
}
flattenHnComments function · typescript · L20-L36 (17 LOC)
src/lib/comments.ts
function flattenHnComments(item: HnItem, depth = 0): Comment[] {
	const comments: Comment[] = [];
	if (item.author && item.text) {
		comments.push({
			id: String(item.id),
			author: item.author,
			text: stripHtml(item.text),
			score: item.points ?? 0,
			depth,
			platform: 'hn',
		});
	}
	for (const child of item.children ?? []) {
		comments.push(...flattenHnComments(child, depth + 1));
	}
	return comments;
}
fetchHnComments function · typescript · L38-L47 (10 LOC)
src/lib/comments.ts
export async function fetchHnComments(storyId: string): Promise<Comment[]> {
	try {
		const response = await fetch(`https://hn.algolia.com/api/v1/items/${storyId}`);
		if (!response.ok) return [];
		const data: HnItem = await response.json();
		return flattenHnComments(data);
	} catch {
		return [];
	}
}
flattenRedditComments function · typescript · L60-L82 (23 LOC)
src/lib/comments.ts
function flattenRedditComments(
	children: Array<{ kind: string; data: RedditCommentData }>,
): Comment[] {
	const comments: Comment[] = [];
	for (const child of children) {
		if (child.kind !== 't1') continue;
		const d = child.data;
		if (d.author && d.body && d.author !== '[deleted]' && d.author !== 'AutoModerator') {
			comments.push({
				id: d.id ?? '',
				author: d.author,
				text: d.body,
				score: d.score ?? 0,
				depth: d.depth ?? 0,
				platform: 'reddit',
			});
		}
		if (d.replies && typeof d.replies === 'object' && d.replies.data?.children) {
			comments.push(...flattenRedditComments(d.replies.data.children));
		}
	}
	return comments;
}
fetchRedditComments function · typescript · L84-L98 (15 LOC)
src/lib/comments.ts
export async function fetchRedditComments(permalink: string): Promise<Comment[]> {
	try {
		const url = `https://www.reddit.com${permalink}.json?limit=100`;
		const response = await fetch(url, {
			headers: { 'User-Agent': 'discussed/0.1' },
		});
		if (!response.ok) return [];
		const data: Array<{ data: { children: Array<{ kind: string; data: RedditCommentData }> } }> =
			await response.json();
		if (data.length < 2) return [];
		return flattenRedditComments(data[1].data.children);
	} catch {
		return [];
	}
}
stripHtml function · typescript · L102-L112 (11 LOC)
src/lib/comments.ts
function stripHtml(html: string): string {
	return html
		.replace(/<[^>]+>/g, ' ')
		.replace(/&amp;/g, '&')
		.replace(/&lt;/g, '<')
		.replace(/&gt;/g, '>')
		.replace(/&quot;/g, '"')
		.replace(/&#x27;/g, "'")
		.replace(/\s+/g, ' ')
		.trim();
}
discoverDiscussions function · typescript · L13-L42 (30 LOC)
src/lib/discovery.ts
export async function discoverDiscussions(
	rawUrl: string,
	options: DiscoverOptions = {},
): Promise<Discussion[]> {
	const userSettings = await settings.getValue();
	const keepQuery = !userSettings.ignoreQueryString;
	const url = normalizeUrl(rawUrl, { keepQueryString: keepQuery });
	const cacheTtlMs = userSettings.cacheDurationMinutes * 60 * 1000;
	const cacheKey = `discussions:${url}`;

	if (!options.force) {
		const cached = await cacheGet<Discussion[]>(cacheKey);
		if (cached) return cached;
	}

	const searches: Array<Promise<Discussion[]>> = [];
	if (userSettings.enableHn) searches.push(searchHn(url));
	if (userSettings.enableReddit) searches.push(searchReddit(url));
	if (userSettings.enableLobsters) searches.push(searchLobsters(url));

	const results = await Promise.allSettled(searches);

	const discussions = results.flatMap((result) =>
		result.status === 'fulfilled' ? result.value : [],
	);

	await cacheSet(cacheKey, discussions, cacheTtlMs);

	return discussions;
}
Generated by Repobility's multi-pass static-analysis pipeline (https://repobility.com)
queryAlgolia function · typescript · L20-L41 (22 LOC)
src/lib/hn.ts
async function queryAlgolia(url: string): Promise<Discussion[]> {
	const params = new URLSearchParams({
		query: url,
		restrictSearchableAttributes: 'url',
		hitsPerPage: '20',
	});

	const response = await fetch(`${ALGOLIA_SEARCH}?${params}`);
	if (!response.ok) return [];

	const data: AlgoliaResponse = await response.json();

	return data.hits.map((hit) => ({
		platform: 'hn' as const,
		title: hit.title,
		url: `https://news.ycombinator.com/item?id=${hit.objectID}`,
		points: hit.points,
		commentCount: hit.num_comments,
		createdAt: new Date(hit.created_at).toISOString(),
		externalId: hit.objectID,
	}));
}
searchHn function · typescript · L43-L58 (16 LOC)
src/lib/hn.ts
export async function searchHn(url: string): Promise<Discussion[]> {
	try {
		const filter = await getBloomFilter();

		if (filter) {
			// Bloom filter available: check before querying
			const maybePresent = checkBloomFilter(filter, url);
			if (!maybePresent) return []; // Definitely not on HN
		}

		// Either Bloom filter says "maybe" or no filter available — query Algolia
		return await queryAlgolia(url);
	} catch {
		return [];
	}
}
summarize function · typescript · L24-L59 (36 LOC)
src/lib/llm.ts
export async function summarize(
	commentsText: string,
	options: SummarizeOptions,
): Promise<SummarizeResult> {
	const userMessage = options.pageTitle
		? `Page: ${options.pageTitle}\nURL: ${options.pageUrl}\n\nComments:\n${commentsText}`
		: `URL: ${options.pageUrl}\n\nComments:\n${commentsText}`;

	const response = await fetch(ANTHROPIC_API, {
		method: 'POST',
		headers: {
			'Content-Type': 'application/json',
			'x-api-key': options.apiKey,
			'anthropic-version': '2023-06-01',
			'anthropic-dangerous-direct-browser-access': 'true',
		},
		body: JSON.stringify({
			model: options.model,
			max_tokens: 1024,
			system: SYSTEM_PROMPT,
			messages: [{ role: 'user', content: userMessage }],
		}),
	});

	if (!response.ok) {
		const error = await response.text();
		throw new Error(`Anthropic API error (${response.status}): ${error}`);
	}

	const data: { content: Array<{ type: string; text: string }> } = await response.json();
	const text = data.content.find((c) => c.type === 'text')?.
searchLobsters function · typescript · L14-L34 (21 LOC)
src/lib/lobsters.ts
export async function searchLobsters(url: string): Promise<Discussion[]> {
	try {
		const { hostname } = new URL(url);
		const response = await fetch(`https://lobste.rs/domains/${hostname}.json`);
		if (!response.ok) return [];

		const stories: LobstersStory[] = await response.json();

		return stories.map((story) => ({
			platform: 'lobsters' as const,
			title: story.title,
			url: story.comments_url,
			points: story.score,
			commentCount: story.comment_count,
			createdAt: new Date(story.created_at).toISOString(),
			externalId: story.short_id,
		}));
	} catch {
		return [];
	}
}
preprocessComments function · typescript · L9-L28 (20 LOC)
src/lib/preprocess.ts
export function preprocessComments(
	comments: Comment[],
	options: PreprocessOptions = { maxComments: 40 },
): Comment[] {
	return (
		comments
			// Remove very short or empty comments
			.filter((c) => c.text.length > 10)
			// Sort by score descending
			.sort((a, b) => b.score - a.score)
			// Take top N
			.slice(0, options.maxComments)
			// Truncate long comments
			.map((c) => ({
				...c,
				text:
					c.text.length > MAX_COMMENT_LENGTH ? `${c.text.slice(0, MAX_COMMENT_LENGTH)}...` : c.text,
			}))
	);
}
formatCommentsForPrompt function · typescript · L30-L37 (8 LOC)
src/lib/preprocess.ts
export function formatCommentsForPrompt(comments: Comment[]): string {
	return comments
		.map((c) => {
			const prefix = c.platform === 'reddit' ? `[Reddit, ${c.score} pts]` : `[HN, ${c.score} pts]`;
			return `${prefix} ${c.author}: ${c.text}`;
		})
		.join('\n\n');
}
searchReddit function · typescript · L22-L50 (29 LOC)
src/lib/reddit.ts
export async function searchReddit(url: string): Promise<Discussion[]> {
	try {
		const params = new URLSearchParams({
			q: `url:${url}`,
			sort: 'top',
			limit: '25',
		});

		const response = await fetch(`${REDDIT_SEARCH}?${params}`, {
			headers: { 'User-Agent': 'discussed/0.1' },
		});
		if (!response.ok) return [];

		const data: RedditListing = await response.json();

		return data.data.children.map(({ data: post }) => ({
			platform: 'reddit' as const,
			title: post.title,
			url: `https://www.reddit.com${post.permalink}`,
			points: post.score,
			commentCount: post.num_comments,
			createdAt: new Date(post.created_utc * 1000).toISOString(),
			externalId: post.name,
			subreddit: post.subreddit,
		}));
	} catch {
		return [];
	}
}
extractPermalink function · typescript · L16-L23 (8 LOC)
src/lib/summarize.ts
function extractPermalink(redditUrl: string): string {
	try {
		const url = new URL(redditUrl);
		return url.pathname;
	} catch {
		return '';
	}
}
Repobility · open methodology · https://repobility.com/research/
fetchAllComments function · typescript · L25-L34 (10 LOC)
src/lib/summarize.ts
async function fetchAllComments(discussions: Discussion[]): Promise<Comment[]> {
	const fetches = discussions.map((d) => {
		if (d.platform === 'hn') return fetchHnComments(d.externalId);
		if (d.platform === 'reddit') return fetchRedditComments(extractPermalink(d.url));
		return Promise.resolve([]);
	});

	const results = await Promise.allSettled(fetches);
	return results.flatMap((r) => (r.status === 'fulfilled' ? r.value : []));
}
summarizeDiscussions function · typescript · L36-L80 (45 LOC)
src/lib/summarize.ts
export async function summarizeDiscussions(
	pageUrl: string,
	discussions: Discussion[],
	options: { force?: boolean } = {},
): Promise<SummaryResult> {
	const cacheKey = `summary:${pageUrl}`;

	if (!options.force) {
		const cached = await cacheGet<SummaryResult>(cacheKey);
		if (cached) return cached;
	}

	const userSettings = await settings.getValue();

	if (!userSettings.apiKey) {
		throw new Error('No API key configured. Add one in extension settings.');
	}

	const allComments = await fetchAllComments(discussions);

	if (allComments.length === 0) {
		throw new Error('No comments found to summarize.');
	}

	const processed = preprocessComments(allComments, {
		maxComments: userSettings.maxCommentsForSummary,
	});
	const commentsText = formatCommentsForPrompt(processed);

	const result = await summarize(commentsText, {
		apiKey: userSettings.apiKey,
		model: userSettings.model,
		pageUrl,
	});

	const summaryResult: SummaryResult = {
		summary: result.summary,
		model: result.model,
extractYouTubeVideoId function · typescript · L7-L26 (20 LOC)
src/lib/url.ts
function extractYouTubeVideoId(url: URL): string | null {
	const host = url.hostname.replace(/^www\./, '');

	if (host === 'youtu.be') {
		const id = url.pathname.slice(1);
		return id || null;
	}

	if (YOUTUBE_HOSTS.has(url.hostname)) {
		if (url.pathname === '/watch') {
			return url.searchParams.get('v');
		}
		const embedMatch = url.pathname.match(/^\/embed\/([^/]+)/);
		if (embedMatch) {
			return embedMatch[1];
		}
	}

	return null;
}
normalizeUrl function · typescript · L28-L57 (30 LOC)
src/lib/url.ts
export function normalizeUrl(raw: string, options: NormalizeUrlOptions = {}): string {
	const url = new URL(raw);

	// Upgrade http to https
	url.protocol = 'https:';

	// Remove www prefix
	url.hostname = url.hostname.replace(/^www\./, '');

	// YouTube special handling: normalize all video URL variants to canonical form
	const videoId = extractYouTubeVideoId(url);
	if (videoId) {
		return `https://youtube.com/watch?v=${videoId}`;
	}

	// Strip fragment
	url.hash = '';

	// Strip query string unless opted in
	if (!options.keepQueryString) {
		url.search = '';
	}

	// Remove trailing slash (but preserve root "/")
	if (url.pathname.length > 1 && url.pathname.endsWith('/')) {
		url.pathname = url.pathname.slice(0, -1);
	}

	return url.toString();
}