Function bodies 406 total
parseBusinesses function · javascript · L357-L384 (28 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper.js
function parseBusinesses(businesses, businessTypes = []) {
// Use first search keyword as category fallback if API doesn't provide one
const fallbackCategory = businessTypes && businessTypes.length > 0
? businessTypes[0]
: "unknown";
return businesses.map(b => {
const address = b.address || b.formattedAddress || "";
const postcode = extractPostcodeFromAddress(address);
return {
name: b.name || b.businessName || b.title || (b.address ? b.address.split(',')[0].trim() : "Unknown Business"),
address: address,
postcode: postcode,
phone: b.phone || b.phoneNumber,
website: b.website || b.url,
rating: b.rating || b.averageRating,
reviewCount: b.reviewCount || b.userRatingsTotal || 0,
category: b.category || (b.types && b.types[0]) || fallbackCategory,
location: {
lat: (b.location && b.location.lat) || (b.geometry && b.geometry.location && b.geometry.location.lat),
lng: (b.location && b.location.lngetBackoffDelay function · javascript · L23-L26 (4 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
function getBackoffDelay(attempt) {
const delay = INITIAL_POLL_DELAY_MS * Math.pow(2, Math.min(attempt - 1, 3));
return Math.min(delay, MAX_POLL_DELAY_MS);
}scrapeGoogleMapsOutscraper function · javascript · L31-L68 (38 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
async function scrapeGoogleMapsOutscraper(location, postcode, businessTypes = [], extractEmails = true) {
const apiKey = getCredential("outscraper", "apiKey");
// Build location query (separate from business type)
const locationQuery = postcode ? `${location}, ${postcode}` : location;
logger.info('google-maps-scraper-outscraper', 'Starting Outscraper scrape', {
location,
postcode,
businessTypes,
locationQuery
});
try {
// Query each business type with location
const results = await Promise.all(
businessTypes.map(type => scrapeQuery(locationQuery, type, apiKey, extractEmails))
);
// Flatten results and deduplicate by place_id
const allBusinesses = results.flat();
const uniqueBusinesses = deduplicateByPlaceId(allBusinesses);
logger.info('google-maps-scraper-outscraper', 'Outscraper scrape complete', {
totalResults: allBusinesses.length,
uniqueResults: uniqueBusinesses.length
});
return uniqueBusinessescrapeQuery function · javascript · L73-L90 (18 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
async function scrapeQuery(locationQuery, businessType, apiKey, extractEmails) {
// Step 1: Submit the job
const jobId = await submitOutscraperJob(locationQuery, businessType, apiKey, extractEmails);
// Step 2: Poll for results
const results = await pollOutscraperJob(jobId, apiKey);
// Step 3: Transform to our format
const transformed = results.map(transformOutscraperBusiness);
logger.info('google-maps-scraper-outscraper', 'Query completed', {
locationQuery,
businessType,
count: transformed.length
});
return transformed;
}submitOutscraperJob function · javascript · L99-L196 (98 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
function submitOutscraperJob(locationQuery, businessType, apiKey, extractEmails) {
return new Promise((resolve, reject) => {
// Build query with business type embedded (NOT as separate categories parameter)
// Outscraper API expects: "hairdressers Bramhall, sk7" (lowercase postcode)
// IMPORTANT: Use minimal parameters - extra params (language, region, extractEmails) can cause 0 results
const fullQuery = businessType
? `${businessType} ${locationQuery.toLowerCase()}` // lowercase for consistency
: locationQuery.toLowerCase();
const params = new URLSearchParams({
query: fullQuery,
limit: '500'
// Do NOT include language, region, extractEmails - causes 0 results bug
});
const options = {
hostname: OUTSCRAPER_BASE_URL,
path: `/maps/search-v3?${params.toString()}`,
method: "GET",
headers: {
"X-API-KEY": apiKey
}
};
const req = https.request(options, (res) => {
// Use Buffer ppollOutscraperJob function · javascript · L201-L313 (113 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
function pollOutscraperJob(jobId, apiKey) {
return new Promise((resolve, reject) => {
let attempts = 0;
const MAX_POLL_TIME_MS = 5 * 60 * 1000; // 5 minutes absolute maximum
const startTime = Date.now();
const poll = () => {
attempts++;
// Check absolute timeout (wall-clock time)
const elapsedTime = Date.now() - startTime;
if (elapsedTime > MAX_POLL_TIME_MS) {
reject(new Error(`Outscraper job polling absolute timeout after ${Math.round(elapsedTime / 1000)}s (max ${MAX_POLL_TIME_MS / 1000}s)`));
return;
}
// Check attempt-based timeout
if (attempts > MAX_POLL_ATTEMPTS) {
reject(new Error(`Outscraper job polling timeout after ${MAX_POLL_ATTEMPTS} attempts`));
return;
}
const options = {
hostname: OUTSCRAPER_RESULTS_URL,
path: `/requests/${jobId}`,
method: "GET",
headers: {
"X-API-KEY": apiKey
}
};
const req = https.requtransformOutscraperBusiness function · javascript · L318-L346 (29 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
function transformOutscraperBusiness(business) {
return {
name: business.name || "Unknown Business",
category: business.type || business.category || "General",
address: business.full_address || business.address || "",
city: business.city || extractCityFromAddress(business.full_address),
postcode: business.postal_code || business.postcode || "",
phone: business.phone || null,
website: business.site || business.website || null,
email: business.emails?.[0] || null,
rating: business.rating || null,
reviewCount: business.reviews || business.reviews_count || 0,
latitude: business.latitude || null,
longitude: business.longitude || null,
placeId: business.place_id || business.google_id || null,
// Additional fields
openingHours: business.working_hours || null,
description: business.description || null,
// Social media
instagramUrl: extractSocialMedia(business, 'instagram'),
facebookUrl: extractSocialMedia(business,If a scraper extracted this row, it came from Repobility (https://repobility.com)
extractCityFromAddress function · javascript · L351-L360 (10 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
function extractCityFromAddress(address) {
if (!address) return "";
const parts = address.split(",").map(p => p.trim());
if (parts.length >= 2) {
return parts[parts.length - 2];
}
return "";
}extractSocialMedia function · javascript · L365-L378 (14 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
function extractSocialMedia(business, platform) {
if (!business.social_media) return null;
if (business.social_media[platform]) {
return business.social_media[platform];
}
if (Array.isArray(business.links)) {
const link = business.links.find(l => l.toLowerCase().includes(platform));
return link || null;
}
return null;
}deduplicateByPlaceId function · javascript · L383-L395 (13 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-outscraper.js
function deduplicateByPlaceId(businesses) {
const seen = new Set();
return businesses.filter(business => {
if (!business.placeId) return true;
if (seen.has(business.placeId)) {
return false;
}
seen.add(business.placeId);
return true;
});
}getBackoffDelay function · javascript · L21-L24 (4 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-scrapula.js
function getBackoffDelay(attempt) {
const delay = INITIAL_POLL_DELAY_MS * Math.pow(2, Math.min(attempt - 1, 3));
return Math.min(delay, MAX_POLL_DELAY_MS);
}scrapeGoogleMapsScrapula function · javascript · L29-L72 (44 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-scrapula.js
async function scrapeGoogleMapsScrapula(location, postcode, businessTypes = [], extractEmails = true) {
const apiKey = getCredential("scrapula", "apiKey");
// Build queries for each business type
const queries = businessTypes.length > 0
? businessTypes.map(type => postcode ? `${type}, ${location}, ${postcode}` : `${type}, ${location}`)
: [postcode ? `${location}, ${postcode}` : location];
logger.info('google-maps-scraper-scrapula', 'Starting Scrapula scrape', {
location,
postcode,
businessTypes,
queries
});
try {
// Create a task for Google Maps scraping
const taskId = await createScrapulaTask(queries, apiKey, extractEmails);
// Poll for task completion
const results = await pollScrapulaTask(taskId, apiKey);
// Transform results
const transformed = results.map(transformScrapulaBusiness);
// Deduplicate by place_id
const uniqueBusinesses = deduplicateByPlaceId(transformed);
logger.info('google-macreateScrapulaTask function · javascript · L77-L154 (78 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-scrapula.js
function createScrapulaTask(queries, apiKey, extractEmails) {
return new Promise((resolve, reject) => {
const postData = JSON.stringify({
service_name: "google_maps_service_v2",
queries: queries,
language: "en",
region: "GB",
limit: 500,
dropDuplicates: true,
enrichments: extractEmails ? ["domains_service"] : []
});
const options = {
hostname: SCRAPULA_BASE_URL,
path: "/tasks",
method: "POST",
headers: {
"X-API-KEY": apiKey,
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(postData)
}
};
const req = https.request(options, (res) => {
let data = "";
res.on("data", (chunk) => {
data += chunk;
});
res.on("end", () => {
try {
if (res.statusCode >= 400) {
logger.error('google-maps-scraper-scrapula', 'Task creation failed', {
statusCode: res.statusCode,
responsepollScrapulaTask function · javascript · L159-L276 (118 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-scrapula.js
function pollScrapulaTask(taskId, apiKey) {
return new Promise((resolve, reject) => {
let attempts = 0;
const poll = () => {
attempts++;
if (attempts > MAX_POLL_ATTEMPTS) {
reject(new Error(`Scrapula task polling timeout after ${MAX_POLL_ATTEMPTS} attempts`));
return;
}
const options = {
hostname: SCRAPULA_BASE_URL,
path: `/tasks/${taskId}`,
method: "GET",
headers: {
"X-API-KEY": apiKey
}
};
const req = https.request(options, (res) => {
let data = "";
res.on("data", (chunk) => {
data += chunk;
});
res.on("end", () => {
try {
const result = JSON.parse(data);
logger.info('google-maps-scraper-scrapula', 'Poll response', {
taskId,
status: result.status,
attempt: attempts
});
if (result.status === "SUCCESS" || result.status === "succfetchResultsFromUrl function · javascript · L281-L352 (72 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-scrapula.js
function fetchResultsFromUrl(fileUrl, apiKey) {
return new Promise((resolve, reject) => {
const url = new URL(fileUrl);
const options = {
hostname: url.hostname,
path: url.pathname + url.search,
method: "GET",
headers: {
"X-API-KEY": apiKey
}
};
const req = https.request(options, (res) => {
let data = "";
res.on("data", (chunk) => {
data += chunk;
});
res.on("end", () => {
try {
// Results might be JSON or CSV
const contentType = res.headers['content-type'] || '';
if (contentType.includes('json')) {
const results = JSON.parse(data);
resolve(Array.isArray(results) ? results : []);
} else if (contentType.includes('csv')) {
// Parse CSV (simplified - you might want a proper CSV parser)
const lines = data.split('\n').filter(l => l.trim());
const headers = lines[0].split(',').mapRepobility analyzer · published findings · https://repobility.com
transformScrapulaBusiness function · javascript · L357-L387 (31 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-scrapula.js
function transformScrapulaBusiness(business) {
return {
name: business.name || business.business_name || "Unknown Business",
category: business.category || business.type || "General",
address: business.address || business.full_address || "",
city: business.city || "",
postcode: business.postal_code || business.postcode || "",
phone: business.phone || business.phone_number || null,
website: business.website || business.site || null,
email: business.email || (business.emails && business.emails[0]) || null,
emailsFromWebsite: business.emails || [],
rating: business.rating || business.stars || null,
reviewCount: business.reviews_count || business.review_count || business.reviews || 0,
latitude: business.latitude || business.lat || null,
longitude: business.longitude || business.lng || business.lon || null,
placeId: business.place_id || business.placeId || business.google_id || null,
// Additional fields
openingHours: budeduplicateByPlaceId function · javascript · L392-L404 (13 LOC)ksd/local-outreach/orchestrator/modules/google-maps-scraper-scrapula.js
function deduplicateByPlaceId(businesses) {
const seen = new Set();
return businesses.filter(business => {
if (!business.placeId) return true;
if (seen.has(business.placeId)) {
return false;
}
seen.add(business.placeId);
return true;
});
}estimateRevenue function · javascript · L15-L141 (127 LOC)ksd/local-outreach/orchestrator/modules/revenue-estimator.js
async function estimateRevenue(business) {
const {
name,
category,
address,
employeeCount,
reviewCount,
locationCount = 1,
website,
linkedInEmployees
} = business;
const apiKey = getCredential("anthropic", "apiKey");
const systemPrompt = "You are an expert business analyst specializing in UK SME revenue estimation. Provide accurate, conservative estimates based on industry benchmarks and business signals. Respond ONLY with valid JSON - no explanations, no markdown.";
const userPrompt = `Estimate UK company revenue based on these signals:
Company: ${name}
Industry: ${category}
Location: ${address}
Employees (Companies House): ${employeeCount || "Unknown"}
Google Reviews: ${reviewCount || 0}
LinkedIn Employees: ${linkedInEmployees || "Unknown"}
Number of locations: ${locationCount}
Website: ${website || "Unknown"}
Provide:
1. Annual revenue (in GBP)
2. Revenue band (£0-100k, £100k-500k, £500k-1M, £1M-5M, £5M+)
3. Confidence level (1-10)
4. KloadTierConfig function · javascript · L14-L21 (8 LOC)ksd/local-outreach/orchestrator/modules/tier-assigner.js
function loadTierConfig() {
try {
const data = fs.readFileSync(TIER_CONFIG_PATH, "utf8");
return JSON.parse(data);
} catch (error) {
throw new Error(`Failed to load tier config: ${error.message}`);
}
}assignTier function · javascript · L26-L57 (32 LOC)ksd/local-outreach/orchestrator/modules/tier-assigner.js
function assignTier(revenue) {
const config = loadTierConfig();
const tiers = config.tiers;
// Find matching tier
for (const [tierId, tier] of Object.entries(tiers)) {
const [min, max] = tier.revenueRange;
if (revenue >= min && revenue < max) {
return {
tierId: tierId,
tierName: tier.name,
setupFee: tier.setupFee,
monthlyPrice: tier.monthlyPrice,
ghlOffer: tier.ghlOffer,
leadMagnet: tier.leadMagnet,
personalBrand: tier.personalBrand || false,
businessGrowth: tier.businessGrowth || false
};
}
}
// Default to Tier 1 if no match
const tier1 = tiers.tier1;
return {
tierId: "tier1",
tierName: tier1.name,
setupFee: tier1.setupFee,
monthlyPrice: tier1.monthlyPrice,
ghlOffer: tier1.ghlOffer,
leadMagnet: tier1.leadMagnet
};
}prepareData function · javascript · L32-L49 (18 LOC)ksd/local-outreach/orchestrator/utils/export-businesses.js
function prepareData(record) {
const b = record.business;
return {
id: record.id,
name: b.name || b.businessName,
location: record.location,
postcode: record.postcode,
address: b.address,
website: b.website,
phone: b.phone,
category: b.category,
ownerEmail: b.ownerEmail,
linkedInUrl: b.linkedInUrl,
estimatedRevenue: b.estimatedRevenue,
assignedOfferTier: b.assignedOfferTier,
status: record.status
};
}exportBusiness function · javascript · L26-L363 (338 LOC)ksd/local-outreach/orchestrator/utils/export-business.js
async function exportBusiness(businessNameOrId, options = {}) {
const {
skipLinkedIn = false,
campaignId = process.env.LEMLIST_CAMPAIGN_ID || getCredential('lemlist', 'campaignId'),
contentProvider = process.env.CONTENT_PROVIDER || 'claude',
dryRun = false
} = options;
console.log(`\n=== EXPORT BUSINESS TO LEMLIST ===\n`);
console.log(`Business: ${businessNameOrId}`);
console.log(`Campaign: ${campaignId || 'NOT SET'}`);
console.log(`LinkedIn Enrichment: ${skipLinkedIn ? 'SKIPPED' : 'ENABLED'}`);
console.log(`Content Provider: ${contentProvider}`);
console.log(`Dry Run: ${dryRun ? 'YES' : 'NO'}\n`);
// Validate environment
if (!campaignId && !dryRun) {
throw new Error('LEMLIST_CAMPAIGN_ID environment variable not set');
}
// Find business in database
let businessRecord = null;
// Try as ID first
businessRecord = getBusiness(businessNameOrId);
// If not found, search by name
if (!businessRecord) {
const allBusinesses = loadBuformatForLemlist function · javascript · L18-L49 (32 LOC)ksd/local-outreach/orchestrator/utils/export-email-sequence.js
function formatForLemlist(emailContent, business) {
let formatted = emailContent;
// Replace first name with merge variable
if (business.ownerFirstName) {
const firstNameRegex = new RegExp(business.ownerFirstName, 'gi');
formatted = formatted.replace(firstNameRegex, '{{firstName}}');
}
// Replace company name with merge variable (use humanized version)
const companyNames = [
business.businessName,
business.name,
business.businessName?.replace(/\s+(Limited|Ltd|Ltd\.|plc|PLC|Inc|Inc\.|LLC|Corp|Corporation|Company|Co\.|Co)$/gi, ''),
].filter(Boolean);
companyNames.forEach(name => {
if (name) {
const nameRegex = new RegExp(name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi');
formatted = formatted.replace(nameRegex, '{{companyName}}');
}
});
// Replace location references
if (business.location || business.city) {
const location = business.location || business.city;
const locationRegex = new RegExp(location, 'gi');
Repobility — same analyzer, your code, free for public repos · /scan/
exportEmailSequence function · javascript · L54-L161 (108 LOC)ksd/local-outreach/orchestrator/utils/export-email-sequence.js
async function exportEmailSequence(businessNameOrId) {
console.log(`\n=== EMAIL SEQUENCE EXPORT FOR LEMLIST ===\n`);
// Find business
let businessRecord = getBusiness(businessNameOrId);
if (!businessRecord) {
const { loadBusinesses } = require("../modules/database");
const allBusinesses = loadBusinesses({ limit: 1000 });
businessRecord = allBusinesses.find(b =>
b.business.name?.toLowerCase().includes(businessNameOrId.toLowerCase()) ||
b.business.businessName?.toLowerCase().includes(businessNameOrId.toLowerCase())
);
}
if (!businessRecord) {
throw new Error(`Business not found: ${businessNameOrId}`);
}
const business = businessRecord.business;
console.log(`Business: ${business.name || business.businessName}`);
console.log(`Owner: ${business.ownerFirstName} ${business.ownerLastName}`);
console.log(`Category: ${business.category}\n`);
// Generate email sequence
console.log(`Generating email sequence...\n`);
const emailContexportToProsp function · javascript · L25-L287 (263 LOC)ksd/local-outreach/orchestrator/utils/export-to-prosp.js
async function exportToProsp(businessNameOrId, options = {}) {
const {
skipLinkedIn = false,
sendImmediately = false,
campaignId = process.env.PROSP_CAMPAIGN_ID || getCredential("prosp", "campaignId"),
listId = process.env.PROSP_LIST_ID || getCredential("prosp", "listId"),
senderUrl = process.env.PROSP_SENDER_URL || getCredential("prosp", "senderUrl"),
dryRun = false,
} = options;
console.log(`\n=== EXPORT BUSINESS TO PROSP LINKEDIN CAMPAIGN ===\n`);
console.log(`Business: ${businessNameOrId}`);
console.log(`Campaign: ${campaignId || "NOT SET"}`);
console.log(`List: ${listId || "NOT SET"}`);
console.log(`LinkedIn Enrichment: ${skipLinkedIn ? "SKIPPED" : "ENABLED"}`);
console.log(`Send Immediately: ${sendImmediately ? "YES" : "NO (campaign auto-send)"}`);
console.log(`Dry Run: ${dryRun ? "YES" : "NO"}\n`);
// Validate environment
if (!campaignId && !dryRun) {
throw new Error("PROSP_CAMPAIGN_ID environment variable not set");
}
if migrate function · javascript · L13-L88 (76 LOC)ksd/local-outreach/orchestrator/utils/migrate-to-db.js
async function migrate() {
console.log("🚀 Starting migration from JSON to Database...");
// Initialize database
initDatabase();
// Ensure directories exist
if (!fs.existsSync(JSON_DIR)) {
console.log(`✅ JSON directory does not exist: ${JSON_DIR}`);
console.log("No migration needed - starting fresh with database.");
return;
}
if (!fs.existsSync(ARCHIVE_DIR)) {
fs.mkdirSync(ARCHIVE_DIR, { recursive: true });
}
// Read all JSON files
const files = fs.readdirSync(JSON_DIR).filter(f => f.endsWith(".json") && f !== "index.json");
if (files.length === 0) {
console.log("✅ No JSON files to migrate");
return;
}
console.log(`Found ${files.length} business files to migrate`);
let migrated = 0;
let skipped = 0;
let errors = 0;
for (const file of files) {
try {
const filePath = path.join(JSON_DIR, file);
const data = JSON.parse(fs.readFileSync(filePath, "utf8"));
// Extract business and metprompt function · javascript · L32-L38 (7 LOC)ksd/local-outreach/orchestrator/utils/resume-approval.js
function prompt(question) {
return new Promise((resolve) => {
rl.question(question, (answer) => {
resolve(answer);
});
});
}displayExportSummary function · javascript · L44-L60 (17 LOC)ksd/local-outreach/orchestrator/utils/resume-approval.js
function displayExportSummary(records) {
cli.log(`${'─'.repeat(68)}`);
cli.log(`EXPORT SUMMARY`);
cli.log(`${'─'.repeat(68)}\n`);
cli.log(`Will export ${records.length} businesses:\n`);
records.slice(0, 10).forEach((record, i) => {
const business = record.business;
cli.log(` ${i + 1}. ${business.name} (${business.category}) - ${business.ownerEmail || 'no email'}`);
});
if (records.length > 10) {
cli.log(` ... and ${records.length - 10} more`);
}
cli.log(`\nExport to Lemlist campaign: ${process.env.LEMLIST_CAMPAIGN_ID || 'NOT SET'}`);
}resumeApproval function · javascript · L67-L179 (113 LOC)ksd/local-outreach/orchestrator/utils/resume-approval.js
async function resumeApproval(location, postcode) {
cli.log(`\n=== RESUME APPROVAL EXPORT ===\n`);
cli.log(`Location: ${location} (${postcode})`);
// Load approved templates
const approvedTemplates = loadApprovedTemplates();
const approvedCategories = Object.keys(approvedTemplates);
if (approvedCategories.length === 0) {
cli.log(`\n✗ No approved categories found. Run approve-cli first.\n`);
rl.close();
process.exit(1);
}
cli.log(`Approved categories: ${approvedCategories.join(', ')}\n`);
// Load businesses with status="enriched" (not yet exported)
const records = loadBusinesses({
status: "enriched",
location: location,
postcode: postcode
});
cli.log(`Found ${records.length} enriched businesses\n`);
// Filter to approved categories only
const toExport = records.filter(record => {
const category = (record.business.category || "unknown").toLowerCase();
return approvedCategories.includes(category);
});
if (toExport.lengprintUsage function · javascript · L184-L191 (8 LOC)ksd/local-outreach/orchestrator/utils/resume-approval.js
function printUsage() {
cli.log(`\nUsage: node resume-approval.js <location> <postcode>`);
cli.log(`Example: node resume-approval.js Bramhall SK7\n`);
cli.log(`Environment variables required:`);
cli.log(` LEMLIST_CAMPAIGN_ID - Lemlist campaign ID (e.g., cam_9NsHPnykWESTncCW8)`);
cli.log(` LEMLIST_API_KEY - Lemlist API key`);
cli.log(` CONTENT_PROVIDER - claude or openai (optional, defaults to claude)\n`);
}htmlToText function · javascript · L50-L81 (32 LOC)llm-extract-comparison.js
function htmlToText(html) {
if (!html) return '';
return html
// Remove script/style blocks
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
.replace(/<noscript[^>]*>[\s\S]*?<\/noscript>/gi, '')
// Remove HTML comments
.replace(/<!--[\s\S]*?-->/g, '')
// Convert common elements to text
.replace(/<br\s*\/?>/gi, '\n')
.replace(/<\/p>/gi, '\n\n')
.replace(/<\/div>/gi, '\n')
.replace(/<\/h[1-6]>/gi, '\n\n')
.replace(/<\/li>/gi, '\n')
// Remove all remaining HTML tags
.replace(/<[^>]+>/g, ' ')
// Decode common HTML entities
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(/ /g, ' ')
.replace(/&#\d+;/g, '')
// Clean up whitespace
.replace(/[ \t]+/g, ' ')
.replace(/\n\s*\n/g, '\n\n')
.trim()
// Cap at ~4000 chars to keep costs low
.substring(0, 400Repobility · code-quality intelligence platform · https://repobility.com
fetchWebsiteText function · javascript · L86-L123 (38 LOC)llm-extract-comparison.js
async function fetchWebsiteText(url) {
const texts = [];
try {
// Fetch homepage
let html = await fetchWebsite(url, 8000);
if (needsBrowserRendering(html)) {
const rendered = await fetchWithBrowser(url, 12000);
if (rendered) html = rendered;
}
texts.push(htmlToText(html));
// Try about/team pages
const parsedUrl = new URL(url);
const aboutPaths = ['/about', '/about-us', '/team', '/meet-the-team', '/about-me'];
for (const path of aboutPaths) {
try {
const pageUrl = `${parsedUrl.protocol}//${parsedUrl.hostname}${path}`;
const pageHtml = await fetchWebsite(pageUrl, 5000);
const pageText = htmlToText(pageHtml);
// Only include if it has substantial content and isn't a 404
if (pageText.length > 200 && !pageText.toLowerCase().includes('page not found') &&
!pageText.toLowerCase().includes('404')) {
texts.push(`--- ${path} page ---\n${pageText}`);
break; // One abllmExtractOwners function · javascript · L142-L182 (41 LOC)llm-extract-comparison.js
async function llmExtractOwners(businessName, websiteText) {
const prompt = PROMPT.replace('{BUSINESS}', businessName).replace('{TEXT}', websiteText);
let text, inputTokens, outputTokens;
if (PROVIDER === 'anthropic') {
const response = await client.messages.create({
model: MODEL,
max_tokens: 300,
messages: [{ role: 'user', content: prompt }]
});
text = response.content[0].text.trim();
inputTokens = response.usage.input_tokens;
outputTokens = response.usage.output_tokens;
} else {
const response = await client.chat.completions.create({
model: MODEL,
max_tokens: 300,
messages: [{ role: 'user', content: prompt }]
});
text = response.choices[0].message.content.trim();
inputTokens = response.usage.prompt_tokens;
outputTokens = response.usage.completion_tokens;
}
try {
const jsonMatch = text.match(/\{[\s\S]*\}/);
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0]);
return {
generateReport function · javascript · L187-L298 (112 LOC)llm-extract-comparison.js
function generateReport(db) {
console.log('\n╔════════════════════════════════════════════════════════════════════╗');
console.log('║ REGEX vs LLM EXTRACTION COMPARISON ║');
console.log('╚════════════════════════════════════════════════════════════════════╝\n');
const allBiz = db.prepare(`
SELECT id, name, website, owner_first_name, owner_last_name, business_data
FROM businesses
WHERE website IS NOT NULL AND length(website) > 0
ORDER BY name
`).all();
let regexOnly = 0, llmOnly = 0, both = 0, neither = 0;
let regexTotal = 0, llmTotal = 0;
let totalInputTokens = 0, totalOutputTokens = 0;
const llmWins = []; // Cases where LLM found names that regex missed
const regexWins = []; // Cases where regex found names that LLM missed
const disagreements = []; // Both found names but different ones
for (const biz of allBiz) {
let data = {};
try { data = biz.business_data ? JSON.parse(biz.business_data) : {}; } catch main function · javascript · L300-L416 (117 LOC)llm-extract-comparison.js
async function main() {
const db = new Database(DB_PATH);
if (reportOnly) {
generateReport(db);
db.close();
return;
}
console.log('\n╔════════════════════════════════════════════════════════════════════╗');
console.log(`║ LLM OWNER EXTRACTION (${MODEL.substring(0, 20).padEnd(20)}) ║`);
console.log('╚════════════════════════════════════════════════════════════════════╝\n');
// Get businesses to process
const whereClause = processAll
? 'WHERE website IS NOT NULL AND length(website) > 0'
: 'WHERE (owner_first_name IS NULL OR length(owner_first_name) = 0) AND website IS NOT NULL AND length(website) > 0';
let query = `
SELECT id, name, website, owner_first_name, owner_last_name, business_data
FROM businesses
${whereClause}
ORDER BY name
`;
if (limit) query += ` LIMIT ${parseInt(limit)}`;
const businesses = db.prepare(query).all();
// Filter out those already LLM-processed (unless --all)
const toProcess = busiexportFromDatabase function · javascript · L12-L76 (65 LOC)quick-export-from-db.js
async function exportFromDatabase() {
console.log('\n╔════════════════════════════════════════════════════════════════════╗');
console.log('║ QUICK EXPORT FROM DATABASE TO LEMLIST ║');
console.log('╚════════════════════════════════════════════════════════════════════╝\n');
// Load all businesses with emails from database
const businesses = loadBusinesses({ hasEmail: true });
console.log(`📊 Found ${businesses.length} businesses with emails in database\n`);
let exported = 0;
let skipped = 0;
let errors = 0;
for (const record of businesses) {
const business = record.business;
if (!business.ownerEmail) {
console.log(`⏭️ Skipping ${business.name} (no email)`);
skipped++;
continue;
}
// Generate merge variables
const mergeVariables = getAllMergeVariables(business);
// Prepare lead data for Lemlist
const leadData = {
email: business.ownerEmail,
firstName: mergeVariables.fircleanBusinessNameForEmail function · javascript · L37-L49 (13 LOC)reexport-clean-leads.js
function cleanBusinessNameForEmail(name) {
if (!name) return name;
// Strip parenthetical internal annotations
let cleaned = name.replace(/\s*\([^)]*\)\s*/g, '').trim();
// Title-case ALL CAPS multi-word names (but leave single-word acronyms like "EMS-IT" alone)
if (cleaned === cleaned.toUpperCase() && /\s/.test(cleaned)) {
cleaned = cleaned.toLowerCase().replace(/\b\w/g, c => c.toUpperCase());
}
return cleaned;
}resolveFirstName function · javascript · L55-L87 (33 LOC)reexport-clean-leads.js
function resolveFirstName(lead) {
const dbName = lead.owner_first_name;
// 1. Valid DB name
if (dbName && dbName !== 'there' && isValidPersonName(dbName)) {
// Also check name pair validation
if (isValidNamePair(dbName, lead.owner_last_name || '')) {
return { firstName: dbName, lastName: lead.owner_last_name || '', usedFallback: false };
}
}
// 2. Team names (e.g., "CRO Info Team") - always valid
if (dbName && dbName.endsWith(' Team')) {
return { firstName: dbName, lastName: '', usedFallback: false };
}
// 3. Extract from email
const email = lead.owner_email;
if (email) {
const extracted = extractNameFromEmail(email);
if (extracted) {
const parts = extracted.split(' ');
const first = parts[0];
const last = parts.slice(1).join(' ');
if (isValidPersonName(first) && isValidNamePair(first, last)) {
return { firstName: first, lastName: last, usedFallback: false };
}
}
}
// 4. Fallback to "thereexport function · javascript · L89-L300 (212 LOC)reexport-clean-leads.js
async function reexport() {
console.log('\n╔════════════════════════════════════════════════════════════════════╗');
console.log('║ RE-EXPORT CLEAN LEADS TO LEMLIST ║');
console.log('╚════════════════════════════════════════════════════════════════════╝\n');
const db = new Database(DB_PATH);
// Build query with category filters
let query = `SELECT * FROM businesses
WHERE owner_email IS NOT NULL AND length(owner_email) > 0
AND (email_verified IS NULL OR email_verified != 0)`;
const params = [];
if (EXCLUDE_TRADES) {
query += ` AND category NOT IN (${TRADE_CATEGORIES.map(() => '?').join(',')})`;
params.push(...TRADE_CATEGORIES);
console.log(`Excluding trades: ${TRADE_CATEGORIES.join(', ')}\n`);
} else if (ONLY_TRADES) {
query += ` AND category IN (${TRADE_CATEGORIES.map(() => '?').join(',')})`;
params.push(...TRADE_CATEGORIES);
console.log(`Only trades: ${TRADE_CATEGORIES.join(', ')}\n`);
} else if (SPIf a scraper extracted this row, it came from Repobility (https://repobility.com)
rescrapeAll function · javascript · L19-L159 (141 LOC)rescrape-all-websites.js
async function rescrapeAll() {
console.log('\n╔════════════════════════════════════════════════════════════════════╗');
console.log('║ RE-SCRAPE ALL BUSINESSES (Playwright-enabled pipeline) ║');
console.log('╚════════════════════════════════════════════════════════════════════╝\n');
const db = new Database(DB_PATH);
const businesses = db.prepare(`
SELECT id, name, website, owner_first_name, owner_last_name, owner_email
FROM businesses
WHERE website IS NOT NULL AND length(website) > 0
ORDER BY name
`).all();
console.log(`Found ${businesses.length} businesses with websites\n`);
let scraped = 0;
let improved = 0;
let updated = 0;
let playwrightUsed = 0;
let errors = 0;
const improvements = [];
for (const biz of businesses) {
const label = `${biz.name.substring(0, 40).padEnd(40)}`;
// Skip social media URLs
if (biz.website.includes('facebook.com') || biz.website.includes('instagram.com')) {
console.log(` SKIP $needsApproval function · javascript · L16-L19 (4 LOC)shared/outreach-core/approval-system/approval-manager.js
function needsApproval(business, approvedTemplates) {
const category = (business.category || "unknown").toLowerCase();
return !approvedTemplates[category];
}addToApprovalQueue function · javascript · L24-L52 (29 LOC)shared/outreach-core/approval-system/approval-manager.js
function addToApprovalQueue(business, emailContent) {
const queue = loadApprovalQueue();
const category = (business.category || "unknown").toLowerCase();
if (!queue[category]) {
queue[category] = {
business: {
name: business.businessName || business.name,
category: business.category,
location: business.location || business.address,
ownerFirstName: business.ownerFirstName,
owners: business.owners || [] // Multi-owner support for Lemlist export
},
email: {
subject: emailContent.subject,
body: emailContent.body
},
status: "pending",
createdAt: new Date().toISOString(),
approvedAt: null,
approvedBy: null
};
saveApprovalQueue(queue);
}
return queue[category];
}approveTemplate function · javascript · L57-L80 (24 LOC)shared/outreach-core/approval-system/approval-manager.js
function approveTemplate(category, approvedBy = "system") {
const queue = loadApprovalQueue();
const templates = loadApprovedTemplates();
if (queue[category] && queue[category].status === "pending") {
queue[category].status = "approved";
queue[category].approvedAt = new Date().toISOString();
queue[category].approvedBy = approvedBy;
templates[category] = {
subject: queue[category].email.subject,
body: queue[category].email.body,
approvedAt: queue[category].approvedAt,
approvedBy: approvedBy
};
saveApprovalQueue(queue);
saveApprovedTemplates(templates);
return true;
}
return false;
}rejectTemplate function · javascript · L85-L98 (14 LOC)shared/outreach-core/approval-system/approval-manager.js
function rejectTemplate(category, reason) {
const queue = loadApprovalQueue();
if (queue[category]) {
queue[category].status = "rejected";
queue[category].rejectedAt = new Date().toISOString();
queue[category].rejectionReason = reason;
saveApprovalQueue(queue);
return true;
}
return false;
}loadApprovalQueue function · javascript · L103-L115 (13 LOC)shared/outreach-core/approval-system/approval-manager.js
function loadApprovalQueue() {
try {
if (fs.existsSync(APPROVAL_QUEUE_PATH)) {
const data = fs.readFileSync(APPROVAL_QUEUE_PATH, "utf8");
return JSON.parse(data);
}
return {};
} catch (error) {
// Log error for debugging but return empty queue to allow system to continue
console.error('[approval-manager] Failed to load approval queue:', error.message);
return {};
}
}saveApprovalQueue function · javascript · L120-L126 (7 LOC)shared/outreach-core/approval-system/approval-manager.js
function saveApprovalQueue(queue) {
const dir = path.dirname(APPROVAL_QUEUE_PATH);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(APPROVAL_QUEUE_PATH, JSON.stringify(queue, null, 2));
}loadApprovedTemplates function · javascript · L131-L143 (13 LOC)shared/outreach-core/approval-system/approval-manager.js
function loadApprovedTemplates() {
try {
if (fs.existsSync(APPROVED_TEMPLATES_PATH)) {
const data = fs.readFileSync(APPROVED_TEMPLATES_PATH, "utf8");
return JSON.parse(data);
}
return {};
} catch (error) {
// Log error for debugging but return empty templates to allow system to continue
console.error('[approval-manager] Failed to load approved templates:', error.message);
return {};
}
}Repobility analyzer · published findings · https://repobility.com
saveApprovedTemplates function · javascript · L148-L154 (7 LOC)shared/outreach-core/approval-system/approval-manager.js
function saveApprovedTemplates(templates) {
const dir = path.dirname(APPROVED_TEMPLATES_PATH);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(APPROVED_TEMPLATES_PATH, JSON.stringify(templates, null, 2));
}getPendingApprovals function · javascript · L159-L164 (6 LOC)shared/outreach-core/approval-system/approval-manager.js
function getPendingApprovals() {
const queue = loadApprovalQueue();
return Object.entries(queue)
.filter(([category, item]) => item.status === "pending")
.map(([category, item]) => ({ category, ...item }));
}editAndApproveTemplate function · javascript · L174-L203 (30 LOC)shared/outreach-core/approval-system/approval-manager.js
function editAndApproveTemplate(category, newSubject, newBody, approvedBy = "system") {
const queue = loadApprovalQueue();
const templates = loadApprovedTemplates();
if (!queue[category] || queue[category].status !== "pending") {
return false;
}
// Update email content in queue
queue[category].email.subject = newSubject;
queue[category].email.body = newBody;
queue[category].status = "approved";
queue[category].approvedAt = new Date().toISOString();
queue[category].approvedBy = approvedBy;
queue[category].edited = true;
// Save to approved templates
templates[category] = {
subject: newSubject,
body: newBody,
approvedAt: queue[category].approvedAt,
approvedBy: approvedBy,
edited: true
};
saveApprovalQueue(queue);
saveApprovedTemplates(templates);
return true;
}