Function bodies 451 total
findSpeciesGroup function · typescript · L215-L257 (43 LOC)scripts/import-cares-iucn-data.ts
async function findSpeciesGroup(
db: Database,
genus: string,
species: string,
verbose: boolean
): Promise<number | null> {
// Try to match by canonical name
const result = await db.get(
`
SELECT group_id
FROM species_name_group
WHERE canonical_genus = ? AND canonical_species_name = ?
`,
[genus, species]
);
if (result) {
if (verbose) {
console.log(` ✓ Found group_id ${result.group_id} for ${genus} ${species}`);
}
return result.group_id;
}
// Try to match by scientific name variants
const variantResult = await db.get(
`
SELECT sng.group_id
FROM species_name_group sng
INNER JOIN species_scientific_name ssn ON sng.group_id = ssn.group_id
WHERE ssn.scientific_name = ?
`,
[`${genus} ${species}`]
);
if (variantResult) {
if (verbose) {
console.log(` ✓ Found group_id ${variantResult.group_id} via scientific name variant`);
}
return variantResult.group_id;
}
return null;
}updateSpeciesGroup function · typescript · L260-L291 (32 LOC)scripts/import-cares-iucn-data.ts
async function updateSpeciesGroup(
db: Database,
groupId: number,
iucnCategory: string,
dryRun: boolean
): Promise<void> {
if (dryRun) {
console.log(` [DRY RUN] Would update group_id ${groupId} with IUCN category: ${iucnCategory}`);
return;
}
const now = new Date().toISOString();
await db.run(
`
UPDATE species_name_group
SET iucn_redlist_category = ?,
iucn_last_updated = ?
WHERE group_id = ?
`,
[iucnCategory, now, groupId]
);
// Log to sync table
await db.run(
`
INSERT INTO iucn_sync_log (group_id, sync_date, status, category_found, error_message)
VALUES (?, ?, ?, ?, ?)
`,
[groupId, now, "csv_import", iucnCategory, null]
);
}importIUCNData function · typescript · L294-L405 (112 LOC)scripts/import-cares-iucn-data.ts
async function importIUCNData() {
const { csvFile, dryRun, verbose } = parseArgs();
console.log("\n=== IUCN Data Import from CARES CSV ===\n");
console.log(`CSV File: ${csvFile}`);
console.log(`Dry Run: ${dryRun ? "YES (no changes will be made)" : "NO"}`);
console.log(`Verbose: ${verbose ? "YES" : "NO"}`);
console.log("");
// Check if CSV file exists
try {
await fs.access(csvFile);
} catch (error) {
console.error(`Error: CSV file not found: ${csvFile}`);
console.error("\nYou can:");
console.error(" 1. Download CARES species data from https://caresforfish.org/");
console.error(" 2. Specify a different CSV file with --csv-file <path>");
console.error(" 3. Run with --help to see usage information");
process.exit(1);
}
// Parse CSV
const species = await parseCSV(csvFile);
if (species.length === 0) {
console.log("\nNo valid species found in CSV. Exiting.");
process.exit(0);
}
// Connect to database
console.log("\nCogetMemberInfo function · typescript · L26-L61 (36 LOC)scripts/merge-members.ts
async function getMemberInfo(memberId: number): Promise<MemberInfo | null> {
const members = await query<{ id: number; contact_email: string; display_name: string }>(
"SELECT id, contact_email, display_name FROM members WHERE id = ?",
[memberId]
);
if (members.length === 0) {
return null;
}
const member = members[0];
const submissions = await query<{ count: number }>(
"SELECT COUNT(*) as count FROM submissions WHERE member_id = ?",
[memberId]
);
const passwords = await query<{ member_id: number }>(
"SELECT member_id FROM password_account WHERE member_id = ?",
[memberId]
);
const google = await query<{ member_id: number }>(
"SELECT member_id FROM google_account WHERE member_id = ?",
[memberId]
);
return {
id: member.id,
contact_email: member.contact_email,
display_name: member.display_name,
submission_count: submissions[0].count,
has_password: passwords.length > 0,
has_google: google.length > 0,
mergeMembers function · typescript · L63-L184 (122 LOC)scripts/merge-members.ts
async function mergeMembers(fromId: number, toId: number) {
try {
console.log("Member Account Merge Tool");
console.log("=========================\n");
// Change to parent directory
process.chdir(path.join(__dirname, ".."));
// Initialize database
console.log("Initializing database connection...");
await init();
// Get info about both members
console.log(`\nFetching member information...`);
const fromMember = await getMemberInfo(fromId);
const toMember = await getMemberInfo(toId);
if (!fromMember) {
console.error(`Error: Member ${fromId} not found`);
process.exit(1);
}
if (!toMember) {
console.error(`Error: Member ${toId} not found`);
process.exit(1);
}
console.log(`\nFrom (will be deleted):`);
console.log(` ID: ${fromMember.id}`);
console.log(` Email: ${fromMember.contact_email}`);
console.log(` Name: ${fromMember.display_name}`);
console.log(` Submissions: ${fromMember.parseNANFAChecklist function · typescript · L22-L85 (64 LOC)scripts/parse-nanfa-usa-species.ts
function parseNANFAChecklist(htmlPath: string): FishSpecies[] {
const html = fs.readFileSync(htmlPath, 'utf-8');
const species: FishSpecies[] = [];
// Split by lines for easier processing
const lines = html.split('\n');
let currentFamily = '';
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
// Extract family name
const familyMatch = line.match(/<b>Family ([^<]+)<\/b>/);
if (familyMatch) {
currentFamily = familyMatch[1].replace(/\(.*?\)/g, '').trim();
continue;
}
// Extract species entries - look for lines with <i> tags containing scientific names
const speciesMatch = line.match(/<i>([^<]+)<\/i>\s*([^;]+);?\s*([^<]*)/);
if (speciesMatch && currentFamily) {
const scientificName = speciesMatch[1].trim();
const remainder = speciesMatch[2] + ' ' + speciesMatch[3];
// Skip if marked as EXOTIC
if (remainder.includes('EXOTIC')) {
continue;
}
// Extract common name (main function · typescript · L87-L109 (23 LOC)scripts/parse-nanfa-usa-species.ts
function main() {
const htmlPath = path.join(__dirname, '..', 'Checklist of Freshwater Fishes Native to North America.html');
if (!fs.existsSync(htmlPath)) {
console.error(`ERROR: Could not find NANFA checklist at ${htmlPath}`);
process.exit(1);
}
console.error('Parsing NANFA checklist...');
const species = parseNANFAChecklist(htmlPath);
console.error(`Found ${species.length} USA-native fish species\n`);
// Output CSV format
console.log('Scientific Name,Common Name,Family,Conservation Status');
for (const sp of species) {
const status = sp.conservationStatus || '';
console.log(`"${sp.scientificName}","${sp.commonName}","${sp.family}","${status}"`);
}
console.error(`\n✓ Exported ${species.length} species to CSV format`);
}Repobility · severity-and-effort ranking · https://repobility.com
setupE2EDatabase function · typescript · L12-L48 (37 LOC)scripts/setup-e2e-db.ts
async function setupE2EDatabase() {
const dbPath = path.join(__dirname, "../db/database.db");
console.log("Setting up E2E test database...");
console.log(`Database path: ${dbPath}`);
// Ensure db directory exists
const dbDir = path.dirname(dbPath);
if (!fs.existsSync(dbDir)) {
console.log(`Creating db directory: ${dbDir}`);
fs.mkdirSync(dbDir, { recursive: true });
}
// Open/create database
const db = await open({
filename: dbPath,
driver: sqlite3.Database,
mode: sqlite3.OPEN_CREATE | sqlite3.OPEN_READWRITE,
});
// Run migrations
console.log("Running migrations...");
await db.migrate({
migrationsPath: path.join(__dirname, "../db/migrations"),
});
console.log("Migrations complete");
// Seed test species data
console.log("Seeding test species...");
await seedTestSpecies(db);
// Close the connection
await db.close();
console.log("E2E database setup complete ✓");
}seedTestSpecies function · typescript · L50-L153 (104 LOC)scripts/setup-e2e-db.ts
async function seedTestSpecies(db: any) {
// Species needed by E2E tests
const testSpecies = [
{
group: { canonical_genus: "Poecilia", canonical_species_name: "reticulata", program_class: "Livebearers", species_type: "Fish" },
commonNames: ["Guppy", "Fancy Guppy"],
scientificNames: ["Poecilia reticulata"]
},
{
group: { canonical_genus: "Xiphophorus", canonical_species_name: "hellerii", program_class: "Livebearers", species_type: "Fish" },
commonNames: ["Swordtail", "Green Swordtail"],
scientificNames: ["Xiphophorus hellerii"]
},
{
group: { canonical_genus: "Xiphophorus", canonical_species_name: "maculatus", program_class: "Livebearers", species_type: "Fish" },
commonNames: ["Platy", "Southern Platyfish"],
scientificNames: ["Xiphophorus maculatus"]
},
// Plant species for E2E testing
{
group: { canonical_genus: "Cryptocoryne", canonical_species_name: "wendtii", program_class: "Cryptocoryne", species_type: "Plant" },
commonNames: ["WendttestImageProcessing function · typescript · L58-L109 (52 LOC)scripts/stress-test-uploads.ts
async function testImageProcessing(
buffer: Buffer,
testName: string,
width: number,
height: number,
fileSizeMB: number
): Promise<TestResult> {
console.log(`\nTesting: ${testName}`);
const startMemory = process.memoryUsage().heapUsed / (1024 * 1024);
const startTime = Date.now();
try {
// Import the image processor
const { processImage } = await import("../src/utils/image-processor");
// Process the image
await processImage(buffer);
const endTime = Date.now();
const endMemory = process.memoryUsage().heapUsed / (1024 * 1024);
const processingTimeMs = endTime - startTime;
const peakMemoryMB = endMemory - startMemory;
console.log(` ✅ Success!`);
console.log(` Processing time: ${processingTimeMs}ms`);
console.log(` Memory used: ${peakMemoryMB.toFixed(2)}MB`);
return {
name: testName,
width,
height,
fileSizeMB,
processingTimeMs,
peakMemoryMB,
success: true,
};
} catch testConcurrentUploads function · typescript · L114-L147 (34 LOC)scripts/stress-test-uploads.ts
async function testConcurrentUploads(count: number): Promise<void> {
console.log(`\n${'='.repeat(60)}`);
console.log(`Testing ${count} concurrent uploads...`);
console.log('='.repeat(60));
const testImages = await Promise.all(
Array.from({ length: count }, (_, i) =>
createTestImage(2000, 1500, `concurrent-${i + 1}`)
)
);
const startTime = Date.now();
const startMemory = process.memoryUsage().heapUsed / (1024 * 1024);
const uploadPromises = testImages.map((img, i) =>
testImageProcessing(img.buffer, `Concurrent Upload ${i + 1}`, 2000, 1500, img.fileSizeMB)
);
const concurrentResults = await Promise.all(uploadPromises);
const endTime = Date.now();
const endMemory = process.memoryUsage().heapUsed / (1024 * 1024);
const totalTime = endTime - startTime;
const totalMemory = endMemory - startMemory;
const successCount = concurrentResults.filter(r => r.success).length;
console.log(`\nConcurrent Upload Summary:`);
console.log(` Total tirunStressTests function · typescript · L152-L231 (80 LOC)scripts/stress-test-uploads.ts
async function runStressTests() {
console.log('='.repeat(60));
console.log('IMAGE UPLOAD STRESS TEST');
console.log('='.repeat(60));
console.log(`Node version: ${process.version}`);
console.log(`Platform: ${os.platform()}`);
console.log(`Total memory: ${(os.totalmem() / (1024 ** 3)).toFixed(2)}GB`);
console.log(`Free memory: ${(os.freemem() / (1024 ** 3)).toFixed(2)}GB`);
console.log('='.repeat(60));
// Test 1: Standard iPhone photos (4032x3024, ~12MP)
console.log(`\n${'='.repeat(60)}`);
console.log('Test 1: Standard iPhone Photo (4032x3024)');
console.log('='.repeat(60));
const iphone12MP = await createTestImage(4032, 3024, "iPhone-12MP");
results.push(
await testImageProcessing(iphone12MP.buffer, "iPhone 12MP (4032x3024)", 4032, 3024, iphone12MP.fileSizeMB)
);
// Test 2: iPhone Pro 48MP (8064x6048)
console.log(`\n${'='.repeat(60)}`);
console.log('Test 2: iPhone Pro 48MP Photo (8064x6048)');
console.log('='.repeat(60));
const iphone48MP = printSummary function · typescript · L236-L314 (79 LOC)scripts/stress-test-uploads.ts
function printSummary() {
console.log(`\n${'='.repeat(60)}`);
console.log('STRESS TEST SUMMARY');
console.log('='.repeat(60));
const successful = results.filter(r => r.success);
const failed = results.filter(r => !r.success);
console.log(`\nTotal tests: ${results.length}`);
console.log(`✅ Passed: ${successful.length}`);
console.log(`❌ Failed: ${failed.length}`);
if (successful.length > 0) {
console.log(`\nPerformance Metrics (Successful Tests):`);
const avgProcessingTime = successful.reduce((sum, r) => sum + r.processingTimeMs, 0) / successful.length;
const maxProcessingTime = Math.max(...successful.map(r => r.processingTimeMs));
const minProcessingTime = Math.min(...successful.map(r => r.processingTimeMs));
const avgMemory = successful.reduce((sum, r) => sum + r.peakMemoryMB, 0) / successful.length;
const maxMemory = Math.max(...successful.map(r => r.peakMemoryMB));
const avgFileSize = successful.reduce((sum, r) => sum + r.fileSizeMBparseStats function · typescript · L57-L96 (40 LOC)scripts/sync-all-species-full-database.ts
function parseStats(output: string): SyncStats {
const stats: SyncStats = {
totalSpecies: 0,
successCount: 0,
notFoundCount: 0,
errorCount: 0,
linksAdded: 0,
imagesAdded: 0,
};
const totalMatch = output.match(/Total processed:\s+(\d+)/);
if (totalMatch) stats.totalSpecies = parseInt(totalMatch[1]);
const successMatch = output.match(/✅ Success:\s+(\d+)/);
if (successMatch) stats.successCount = parseInt(successMatch[1]);
const notFoundMatch = output.match(/❌ Not found:\s+(\d+)/);
if (notFoundMatch) stats.notFoundCount = parseInt(notFoundMatch[1]);
const errorMatch = output.match(/⚠️\s+Errors?:\s+(\d+)/);
if (errorMatch) stats.errorCount = parseInt(errorMatch[1]);
const linksMatch = output.match(/Total new links:\s+(\d+)/);
if (linksMatch) stats.linksAdded = parseInt(linksMatch[1]);
const imagesMatch = output.match(/Total new images:\s+(\d+)/);
if (imagesMatch) stats.imagesAdded = parseInt(imagesMatch[1]);
// Also try FishBase frunSyncScript function · typescript · L98-L167 (70 LOC)scripts/sync-all-species-full-database.ts
async function runSyncScript(
scriptPath: string,
args: string[],
source: string
): Promise<SyncResult> {
const startTime = Date.now();
return new Promise((resolve) => {
console.log(`\n${"=".repeat(80)}`);
console.log(`🔄 Starting ${source} sync...`);
console.log(` Script: ${scriptPath}`);
console.log(` Args: ${args.join(" ")}`);
console.log(` Time: ${new Date().toISOString()}`);
console.log(`${"=".repeat(80)}\n`);
let stdout = "";
let stderr = "";
const child = spawn("npm", ["run", "script", scriptPath, "--", ...args], {
cwd: join(__dirname, ".."),
stdio: ["ignore", "pipe", "pipe"],
});
child.stdout.on("data", (data) => {
const str = data.toString();
stdout += str;
process.stdout.write(str);
});
child.stderr.on("data", (data) => {
const str = data.toString();
stderr += str;
process.stderr.write(str);
});
child.on("close", (code) => {
const duration = (All rows above produced by Repobility · https://repobility.com
main function · typescript · L169-L341 (173 LOC)scripts/sync-all-species-full-database.ts
async function main() {
const args = process.argv.slice(2);
const execute = args.includes("--execute");
const downloadImages = args.includes("--download-images");
const batchSizeArg = args.find((arg) => arg.startsWith("--batch-size="));
const batchSize = batchSizeArg ? batchSizeArg.split("=")[1] : undefined;
const speciesTypeArg = args.find((arg) => arg.startsWith("--species-type="));
const speciesType = speciesTypeArg ? speciesTypeArg.split("=")[1] : undefined;
const skipWikipedia = args.includes("--skip-wikipedia");
const skipGbif = args.includes("--skip-gbif");
const skipFishbase = args.includes("--skip-fishbase");
const overallStartTime = Date.now();
console.log("\n" + "=".repeat(80));
console.log("🌐 Full Database External Data Sync Orchestrator");
console.log("=".repeat(80));
console.log(`Mode: ${execute ? "🔴 EXECUTE" : "🟡 DRY-RUN"}`);
console.log(`Download images to R2: ${downloadImages ? "YES" : "NO (store external URLs)"}`);
console.log(`StargetS3Client function · typescript · L86-L98 (13 LOC)scripts/sync-fishbase-all-species.ts
function getS3Client(): S3Client {
if (!s3Client) {
s3Client = new S3Client({
region: 'auto',
endpoint: config.storage.s3Url,
credentials: {
accessKeyId: config.storage.s3AccessKeyId,
secretAccessKey: config.storage.s3Secret,
},
});
}
return s3Client;
}checkR2Exists function · typescript · L112-L124 (13 LOC)scripts/sync-fishbase-all-species.ts
async function checkR2Exists(r2Key: string): Promise<boolean> {
try {
await getS3Client().send(
new HeadObjectCommand({
Bucket: config.storage.s3Bucket,
Key: r2Key,
})
);
return true;
} catch {
return false;
}
}getExistingReferences function · typescript · L195-L201 (7 LOC)scripts/sync-fishbase-all-species.ts
async function getExistingReferences(sqlite: Database, groupId: number): Promise<Set<string>> {
const refs = await sqlite.all<ExistingReference[]>(
'SELECT reference_url FROM species_external_references WHERE group_id = ?',
[groupId]
);
return new Set(refs.map(r => r.reference_url));
}getExistingImages function · typescript · L203-L209 (7 LOC)scripts/sync-fishbase-all-species.ts
async function getExistingImages(sqlite: Database, groupId: number): Promise<Set<string>> {
const imgs = await sqlite.all<ExistingImage[]>(
'SELECT image_url FROM species_images WHERE group_id = ?',
[groupId]
);
return new Set(imgs.map(i => i.image_url));
}syncSpecies function · typescript · L211-L304 (94 LOC)scripts/sync-fishbase-all-species.ts
async function syncSpecies(
sqlite: Database,
duckConn: DuckDBConnection,
species: OurSpecies,
force: boolean
): Promise<SyncResult> {
const scientificName = `${species.canonical_genus} ${species.canonical_species_name}`;
try {
// Query FishBase for this species
const fishbaseSpecies = await duckConn.all<FishBaseSpecies>(`
SELECT
SpecCode, Genus, Species,
PicPreferredName, PicPreferredNameM, PicPreferredNameF, PicPreferredNameJ,
Pic, PictureFemale, LarvaPic, EggPic
FROM fb_species
WHERE LOWER(Genus) = LOWER(?)
AND LOWER(Species) = LOWER(?)
LIMIT 1
`, species.canonical_genus, species.canonical_species_name);
if (fishbaseSpecies.length === 0) {
return {
group_id: species.group_id,
scientific_name: scientificName,
spec_code: 0,
fishbase_url: '',
image_urls: [],
new_links: 0,
new_images: 0,
status: 'not_found',
};
}
constapplySync function · typescript · L306-L390 (85 LOC)scripts/sync-fishbase-all-species.ts
async function applySync(
sqlite: Database,
result: SyncResult,
downloadImages: boolean
): Promise<void> {
await sqlite.exec('BEGIN TRANSACTION');
try {
const now = new Date().toISOString();
// Add FishBase URL to external references if new
if (result.new_links > 0) {
// Get current max display_order
const maxOrder = await sqlite.get<{ max_order: number | null }>(
'SELECT MAX(display_order) as max_order FROM species_external_references WHERE group_id = ?',
[result.group_id]
);
const nextOrder = (maxOrder?.max_order ?? -1) + 1;
await sqlite.run(
`INSERT INTO species_external_references (group_id, reference_url, display_order)
VALUES (?, ?, ?)
ON CONFLICT (group_id, reference_url) DO NOTHING`,
[result.group_id, result.fishbase_url, nextOrder]
);
}
// Add new images
if (result.new_images > 0) {
// Get current max display_order
const maxOrder = await sqlitmain function · typescript · L392-L589 (198 LOC)scripts/sync-fishbase-all-species.ts
async function main() {
const args = process.argv.slice(2);
const execute = args.includes('--execute');
const force = args.includes('--force');
const downloadImages = args.includes('--download-images');
const batchSizeArg = args.find(arg => arg.startsWith('--batch-size='));
const batchSize = batchSizeArg ? parseInt(batchSizeArg.split('=')[1]) : undefined;
const speciesIdArg = args.find(arg => arg.startsWith('--species-id='));
const speciesId = speciesIdArg ? parseInt(speciesIdArg.split('=')[1]) : undefined;
const startAfterArg = args.find(arg => arg.startsWith('--start-after='));
const startAfter = startAfterArg ? parseInt(startAfterArg.split('=')[1]) : undefined;
const dbArg = args.find(arg => arg.startsWith('--db='));
const customDbPath = dbArg ? dbArg.split('=')[1] : null;
console.log('\n=== FishBase Full Database Sync (DuckDB) ===\n');
console.log(`Mode: ${execute ? '🔴 EXECUTE (will modify database)' : '🟡 DRY-RUN (preview only)'}`);
console.log(`ForceMethodology: Repobility · https://repobility.com/research/state-of-ai-code-2026/
getS3Client function · typescript · L109-L121 (13 LOC)scripts/sync-gbif-all-species.ts
function getS3Client(): S3Client {
if (!s3Client) {
s3Client = new S3Client({
region: "auto",
endpoint: config.storage.s3Url,
credentials: {
accessKeyId: config.storage.s3AccessKeyId,
secretAccessKey: config.storage.s3Secret,
},
});
}
return s3Client;
}checkR2Exists function · typescript · L135-L147 (13 LOC)scripts/sync-gbif-all-species.ts
async function checkR2Exists(r2Key: string): Promise<boolean> {
try {
await getS3Client().send(
new HeadObjectCommand({
Bucket: config.storage.s3Bucket,
Key: r2Key,
})
);
return true;
} catch {
return false;
}
}matchSpecies function · typescript · L213-L243 (31 LOC)scripts/sync-gbif-all-species.ts
async function matchSpecies(genus: string, species: string): Promise<GBIFSpeciesMatch | null> {
const scientificName = `${genus} ${species}`;
try {
const url = `${GBIF_API_BASE}/species/match?name=${encodeURIComponent(scientificName)}&verbose=false`;
const response = await fetch(url, {
method: "GET",
headers: {
Accept: "application/json",
"User-Agent": "BAP-Species-Database/1.0 (mulm project)",
},
});
if (!response.ok) {
return null;
}
const data = (await response.json()) as GBIFSpeciesMatch;
// Check if we got a good match
if (data.matchType === "NONE" || data.confidence < 80) {
return null;
}
return data;
} catch (error: any) {
console.error(` Failed to match species: ${error.message}`);
return null;
}
}getSpeciesMedia function · typescript · L248-L277 (30 LOC)scripts/sync-gbif-all-species.ts
async function getSpeciesMedia(usageKey: number, limit = 10): Promise<GBIFMedia[]> {
try {
const url = `${GBIF_API_BASE}/species/${usageKey}/media?limit=${limit}`;
const response = await fetch(url, {
method: "GET",
headers: {
Accept: "application/json",
"User-Agent": "BAP-Species-Database/1.0 (mulm project)",
},
});
if (!response.ok) {
return [];
}
const data = (await response.json()) as GBIFMediaResponse;
if (!data.results) {
return [];
}
// Filter to only images (StillImage)
return data.results.filter(
(media) => media.type === "StillImage" && media.identifier
);
} catch (error) {
return [];
}
}getExternalData function · typescript · L282-L307 (26 LOC)scripts/sync-gbif-all-species.ts
async function getExternalData(genus: string, species: string): Promise<GBIFResult | null> {
const match = await matchSpecies(genus, species);
if (!match) {
return null;
}
// Construct GBIF species page URL
const gbifUrl = `${GBIF_SPECIES_URL}/${match.usageKey}`;
// Construct occurrence map URL (distribution map)
const occurrenceMapUrl = `https://api.gbif.org/v2/map/occurrence/density/0/0/[email protected]?taxonKey=${match.usageKey}&bin=hex&hexPerTile=30&style=purpleYellow.point`;
// Get images
const media = await getSpeciesMedia(match.usageKey, 10);
const imageUrls = media.map((m) => m.identifier).filter((url) => url && url.length > 0);
return {
usageKey: match.usageKey,
gbifUrl,
occurrenceMapUrl,
imageUrls,
scientificName: match.canonicalName || match.scientificName,
confidence: match.confidence,
};
}getExistingReferences function · typescript · L309-L318 (10 LOC)scripts/sync-gbif-all-species.ts
async function getExistingReferences(
sqlite: Database,
groupId: number
): Promise<Set<string>> {
const refs = await sqlite.all<ExistingReference[]>(
"SELECT reference_url FROM species_external_references WHERE group_id = ?",
[groupId]
);
return new Set(refs.map((r) => r.reference_url));
}getExistingImages function · typescript · L320-L326 (7 LOC)scripts/sync-gbif-all-species.ts
async function getExistingImages(sqlite: Database, groupId: number): Promise<Set<string>> {
const imgs = await sqlite.all<ExistingImage[]>(
"SELECT image_url FROM species_images WHERE group_id = ?",
[groupId]
);
return new Set(imgs.map((i) => i.image_url));
}syncSpecies function · typescript · L328-L410 (83 LOC)scripts/sync-gbif-all-species.ts
async function syncSpecies(
sqlite: Database,
species: OurSpecies,
force: boolean
): Promise<SyncResult> {
const scientificName = `${species.canonical_genus} ${species.canonical_species_name}`;
try {
// Query GBIF for this species
const gbifData = await getExternalData(
species.canonical_genus,
species.canonical_species_name
);
if (!gbifData) {
return {
group_id: species.group_id,
scientific_name: scientificName,
species_type: species.species_type,
gbif_url: "",
occurrence_map_url: "",
image_urls: [],
new_links: 0,
new_images: 0,
status: "not_found",
};
}
// Get existing data
const existingRefs = await getExistingReferences(sqlite, species.group_id);
const existingImages = await getExistingImages(sqlite, species.group_id);
// Collect all URLs (GBIF page + occurrence map as reference URLs)
const allUrls = [gbifData.gbifUrl];
// Note: WRepobility (the analyzer behind this table) · https://repobility.com
applySync function · typescript · L412-L506 (95 LOC)scripts/sync-gbif-all-species.ts
async function applySync(
sqlite: Database,
result: SyncResult,
downloadImages: boolean
): Promise<void> {
await sqlite.exec("BEGIN TRANSACTION");
try {
const now = new Date().toISOString();
// Add GBIF URL to external references if new
if (result.new_links > 0) {
// Get current max display_order
const maxOrder = await sqlite.get<{ max_order: number | null }>(
"SELECT MAX(display_order) as max_order FROM species_external_references WHERE group_id = ?",
[result.group_id]
);
let nextOrder = (maxOrder?.max_order ?? -1) + 1;
// Add GBIF species page URL
if (result.gbif_url) {
await sqlite.run(
`INSERT INTO species_external_references (group_id, reference_url, display_order)
VALUES (?, ?, ?)
ON CONFLICT (group_id, reference_url) DO NOTHING`,
[result.group_id, result.gbif_url, nextOrder++]
);
}
}
// Add new images
if (result.new_images > 0)parseArgs function · typescript · L70-L105 (36 LOC)scripts/sync-iucn-data.ts
function parseArgs(): CLIOptions {
const args = process.argv.slice(2);
const options: CLIOptions = {
dryRun: false,
missingOnly: false,
verbose: false,
resume: false,
checkSynonyms: false,
};
for (let i = 0; i < args.length; i++) {
if (args[i] === "--dry-run") {
options.dryRun = true;
} else if (args[i] === "--limit" && i + 1 < args.length) {
options.limit = parseInt(args[++i]);
} else if (args[i] === "--missing-only") {
options.missingOnly = true;
} else if (args[i] === "--stale-only") {
const days = i + 1 < args.length && !args[i + 1].startsWith("--") ? parseInt(args[++i]) : 365;
options.staleOnly = days;
} else if (args[i] === "--species-id" && i + 1 < args.length) {
options.speciesId = parseInt(args[++i]);
} else if (args[i] === "--verbose") {
options.verbose = true;
} else if (args[i] === "--resume") {
options.resume = true;
} else if (args[i] === "--check-synonyms") {
printHelp function · typescript · L107-L143 (37 LOC)scripts/sync-iucn-data.ts
function printHelp() {
console.log(`
Bulk Sync IUCN Red List Data from API
Usage:
npm run script scripts/sync-iucn-data.ts [options]
Options:
--dry-run Preview changes without updating database
--limit N Process only N species (for testing)
--missing-only Only sync species without IUCN data
--stale-only [days] Only sync species with data older than N days (default: 365)
--species-id ID Sync single species by group ID
--check-synonyms Check if IUCN has species under a different name
--verbose Show detailed progress information
--resume Resume from last failed sync
--help, -h Show this help message
Examples:
# Test with 10 species (dry run)
npm run script scripts/sync-iucn-data.ts --dry-run --limit 10
# Sync only species missing IUCN data
npm run script scripts/sync-iucn-data.ts --missing-only
# Sync single species
npm run script scripts/sync-iucn-data.ts --specgetSpeciesToSync function · typescript · L146-L179 (34 LOC)scripts/sync-iucn-data.ts
async function getSpeciesToSync(db: Database, options: CLIOptions): Promise<SpeciesForSync[]> {
if (options.speciesId) {
// Single species by ID
const species = await db.get<SpeciesForSync>(
`SELECT group_id, canonical_genus, canonical_species_name, program_class
FROM species_name_group WHERE group_id = ?`,
[options.speciesId]
);
return species ? [species] : [];
}
if (options.missingOnly) {
// Only species without IUCN data
return await getSpeciesWithMissingIucn(db);
}
if (options.staleOnly !== undefined) {
// Only species with old data
return await getSpeciesNeedingResync(db, options.staleOnly);
}
// All species (default)
let query = `
SELECT group_id, canonical_genus, canonical_species_name, program_class, iucn_redlist_category
FROM species_name_group
ORDER BY canonical_genus, canonical_species_name
`;
if (options.limit) {
query += ` LIMIT ${options.limit}`;
}
return await db.all<SpeciesFosyncSpecies function · typescript · L182-L187 (6 LOC)scripts/sync-iucn-data.ts
async function syncSpecies(
db: Database,
client: ReturnType<typeof getIUCNClient>,
species: SpeciesForSync,
options: CLIOptions
): Promise<{ status: SyncStatus; category?: string; error?: string; synonymFound?: boolean }> {syncIUCNData function · typescript · L286-L411 (126 LOC)scripts/sync-iucn-data.ts
async function syncIUCNData() {
const options = parseArgs();
console.log("\n=== IUCN Red List Bulk Sync ===\n");
console.log(`Mode: ${options.dryRun ? "DRY RUN (no changes)" : "LIVE"}`);
console.log(`Verbose: ${options.verbose ? "YES" : "NO"}`);
if (options.speciesId) {
console.log(`Target: Single species (ID: ${options.speciesId})`);
} else if (options.missingOnly) {
console.log(`Target: Species without IUCN data`);
} else if (options.staleOnly) {
console.log(`Target: Species with data older than ${options.staleOnly} days`);
} else {
console.log(`Target: All species${options.limit ? ` (limit: ${options.limit})` : ""}`);
}
console.log("");
// Connect to database
console.log("Connecting to database...");
const db = await open({
filename: config.databaseFile,
driver: sqlite3.Database,
});
// Get IUCN client
console.log("Initializing IUCN API client...");
const client = getIUCNClient();
// Get species to sync
console.log(getSpeciesData function · typescript · L20-L60 (41 LOC)scripts/sync-species-to-prod-mcp.ts
async function getSpeciesData(groupId: number): Promise<SpeciesExternalData | null> {
const dbPath = join(__dirname, '../db/database.db');
const db = await open({
filename: dbPath,
driver: sqlite3.Database,
mode: sqlite3.OPEN_READONLY,
});
// Get species info
const species = await db.get<{ canonical_genus: string; canonical_species_name: string }>(
'SELECT canonical_genus, canonical_species_name FROM species_name_group WHERE group_id = ?',
[groupId]
);
if (!species) {
await db.close();
return null;
}
// Get external references
const refs = await db.all<{ reference_url: string }>(
'SELECT reference_url FROM species_external_references WHERE group_id = ? ORDER BY display_order',
[groupId]
);
// Get images
const images = await db.all<{ image_url: string }>(
'SELECT image_url FROM species_images WHERE group_id = ? ORDER BY display_order',
[groupId]
);
await db.close();
return {
group_id: groupId,
canomain function · typescript · L62-L124 (63 LOC)scripts/sync-species-to-prod-mcp.ts
async function main() {
const args = process.argv.slice(2);
const speciesIdArg = args.find(arg => arg.startsWith('--species-id='));
if (!speciesIdArg) {
console.error('❌ Missing required argument: --species-id=<id>');
console.error('Usage: npm run script scripts/sync-species-to-prod-mcp.ts -- --species-id=61');
process.exit(1);
}
const speciesId = parseInt(speciesIdArg.split('=')[1]);
console.log(`\n=== Sync Species ${speciesId} to Production ===\n`);
const data = await getSpeciesData(speciesId);
if (!data) {
console.error(`❌ Species ${speciesId} not found in local database`);
process.exit(1);
}
console.log(`Species: ${data.canonical_genus} ${data.canonical_species_name}`);
console.log(`External References: ${data.external_references.length}`);
console.log(`Images: ${data.image_links.length}\n`);
if (data.external_references.length === 0 && data.image_links.length === 0) {
console.log('⚠️ No external data to sync for this specieRepobility · severity-and-effort ranking · https://repobility.com
getS3Client function · typescript · L139-L151 (13 LOC)scripts/sync-wikipedia-all-species.ts
function getS3Client(): S3Client {
if (!s3Client) {
s3Client = new S3Client({
region: "auto",
endpoint: config.storage.s3Url,
credentials: {
accessKeyId: config.storage.s3AccessKeyId,
secretAccessKey: config.storage.s3Secret,
},
});
}
return s3Client;
}checkR2Exists function · typescript · L177-L189 (13 LOC)scripts/sync-wikipedia-all-species.ts
async function checkR2Exists(r2Key: string): Promise<boolean> {
try {
await getS3Client().send(
new HeadObjectCommand({
Bucket: config.storage.s3Bucket,
Key: r2Key,
})
);
return true;
} catch {
return false;
}
}queryWikidata function · typescript · L262-L315 (54 LOC)scripts/sync-wikipedia-all-species.ts
async function queryWikidata(
genus: string,
species: string
): Promise<WikidataSpeciesResult[]> {
const scientificName = `${genus} ${species}`;
const sparqlQuery = `
SELECT DISTINCT ?item ?itemLabel ?article ?image WHERE {
?item wdt:P225 "${scientificName}" .
OPTIONAL {
?article schema:about ?item ;
schema:isPartOf <https://en.wikipedia.org/> .
}
OPTIONAL {
?item wdt:P18 ?image .
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
LIMIT 5
`.trim();
try {
const sparqlEndpoint = "https://query.wikidata.org/sparql";
const url =
sparqlEndpoint +
"?" +
new URLSearchParams({
query: sparqlQuery,
format: "json",
}).toString();
const response = await fetch(url, {
method: "GET",
headers: {
Accept: "application/sparql-results+json",
"User-Agent": "BAP-Species-Database/1.0 (mulm project)",
},
});
if (!response.ok) {
return [];
getWikipediaPageSummary function · typescript · L320-L343 (24 LOC)scripts/sync-wikipedia-all-species.ts
async function getWikipediaPageSummary(
title: string,
lang = "en"
): Promise<WikipediaPageSummary | null> {
try {
const url = `https://${lang}.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(title)}`;
const response = await fetch(url, {
method: "GET",
headers: {
Accept: "application/json",
"User-Agent": "BAP-Species-Database/1.0 (mulm project)",
},
});
if (!response.ok) {
return null;
}
return await response.json();
} catch (error) {
return null;
}
}getExternalData function · typescript · L348-L410 (63 LOC)scripts/sync-wikipedia-all-species.ts
async function getExternalData(
genus: string,
species: string
): Promise<WikipediaResult | null> {
const wikidataResults = await queryWikidata(genus, species);
if (wikidataResults.length === 0) {
return null;
}
const primaryResult = wikidataResults[0];
// Extract Wikidata ID from URL
const wikidataUrlMatch = primaryResult.item.value.match(/Q\d+$/);
if (!wikidataUrlMatch) {
return null;
}
const wikidataId = wikidataUrlMatch[0];
const wikidataUrl = primaryResult.item.value;
// Collect Wikipedia article URLs
const wikipediaUrls: Record<string, string> = {};
if (primaryResult.article) {
wikipediaUrls.en = primaryResult.article.value;
}
// Try to get page summary for more details
const scientificName = `${genus} ${species}`;
const pageSummary = await getWikipediaPageSummary(scientificName, "en");
if (pageSummary && pageSummary.content_urls.desktop.page) {
wikipediaUrls.en = pageSummary.content_urls.desktop.page;
}
// CollgetExistingReferences function · typescript · L412-L421 (10 LOC)scripts/sync-wikipedia-all-species.ts
async function getExistingReferences(
sqlite: Database,
groupId: number
): Promise<Set<string>> {
const refs = await sqlite.all<ExistingReference[]>(
"SELECT reference_url FROM species_external_references WHERE group_id = ?",
[groupId]
);
return new Set(refs.map((r) => r.reference_url));
}getExistingImages function · typescript · L423-L429 (7 LOC)scripts/sync-wikipedia-all-species.ts
async function getExistingImages(sqlite: Database, groupId: number): Promise<Set<string>> {
const imgs = await sqlite.all<ExistingImage[]>(
"SELECT image_url FROM species_images WHERE group_id = ?",
[groupId]
);
return new Set(imgs.map((i) => i.image_url));
}syncSpecies function · typescript · L431-L509 (79 LOC)scripts/sync-wikipedia-all-species.ts
async function syncSpecies(
sqlite: Database,
species: OurSpecies,
force: boolean
): Promise<SyncResult> {
const scientificName = `${species.canonical_genus} ${species.canonical_species_name}`;
try {
// Query Wikipedia/Wikidata for this species
const wikiData = await getExternalData(
species.canonical_genus,
species.canonical_species_name
);
if (!wikiData) {
return {
group_id: species.group_id,
scientific_name: scientificName,
species_type: species.species_type,
wikidata_url: "",
wikipedia_urls: {},
image_urls: [],
new_links: 0,
new_images: 0,
status: "not_found",
};
}
// Get existing data
const existingRefs = await getExistingReferences(sqlite, species.group_id);
const existingImages = await getExistingImages(sqlite, species.group_id);
// Collect all URLs (Wikidata + Wikipedia articles)
const allUrls = [wikiData.wikidataUrl, ...Object.vaAll rows above produced by Repobility · https://repobility.com
applySync function · typescript · L511-L636 (126 LOC)scripts/sync-wikipedia-all-species.ts
async function applySync(
sqlite: Database,
result: SyncResult,
downloadImages: boolean
): Promise<void> {
await sqlite.exec("BEGIN TRANSACTION");
try {
const now = new Date().toISOString();
// Add Wikidata and Wikipedia URLs to external references if new
if (result.new_links > 0) {
// Get current max display_order
const maxOrder = await sqlite.get<{ max_order: number | null }>(
"SELECT MAX(display_order) as max_order FROM species_external_references WHERE group_id = ?",
[result.group_id]
);
let nextOrder = (maxOrder?.max_order ?? -1) + 1;
// Add Wikidata URL first
if (result.wikidata_url) {
await sqlite.run(
`INSERT INTO species_external_references (group_id, reference_url, display_order)
VALUES (?, ?, ?)
ON CONFLICT (group_id, reference_url) DO NOTHING`,
[result.group_id, result.wikidata_url, nextOrder++]
);
}
// Add Wikipedia article URLmain function · typescript · L638-L833 (196 LOC)scripts/sync-wikipedia-all-species.ts
async function main() {
const args = process.argv.slice(2);
const execute = args.includes("--execute");
const force = args.includes("--force");
const downloadImages = args.includes("--download-images");
const batchSizeArg = args.find((arg) => arg.startsWith("--batch-size="));
const batchSize = batchSizeArg ? parseInt(batchSizeArg.split("=")[1]) : undefined;
const speciesTypeArg = args.find((arg) => arg.startsWith("--species-type="));
const speciesType = speciesTypeArg ? speciesTypeArg.split("=")[1] : undefined;
const startAfterArg = args.find((arg) => arg.startsWith("--start-after="));
const startAfter = startAfterArg ? parseInt(startAfterArg.split("=")[1]) : undefined;
const dbArg = args.find((arg) => arg.startsWith("--db="));
const customDbPath = dbArg ? dbArg.split("=")[1] : null;
console.log("\n=== Wikipedia/Wikidata Full Database Sync ===\n");
console.log(
`Mode: ${execute ? "🔴 EXECUTE (will modify database)" : "🟡 DRY-RUN (preview only)"}`
);
ccreateTestImage function · typescript · L19-L39 (21 LOC)scripts/test-image-processor-simple.ts
async function createTestImage(width: number, height: number, label: string): Promise<Buffer> {
const svg = `
<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg">
<defs>
<linearGradient id="grad" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" style="stop-color:rgb(74,144,226);stop-opacity:1" />
<stop offset="100%" style="stop-color:rgb(126,87,194);stop-opacity:1" />
</linearGradient>
</defs>
<rect width="${width}" height="${height}" fill="url(#grad)"/>
<text x="50%" y="45%" font-size="${Math.min(width, height) / 10}" fill="white" text-anchor="middle" dominant-baseline="middle" font-family="sans-serif">
${label}
</text>
<text x="50%" y="55%" font-size="${Math.min(width, height) / 20}" fill="white" text-anchor="middle" dominant-baseline="middle" font-family="sans-serif">
${width}x${height}
</text>
</svg>
`;
return sharp(Buffer.from(svg)).jpeg({ qual