Function bodies 82 total
main function · python · L40-L153 (114 LOC)scripts/index_library.py
def main():
parser = argparse.ArgumentParser(description="Index an EndNote library into SQLite.")
parser.add_argument("--config", type=str, help="Path to config.yaml")
parser.add_argument("--skip-pdfs", action="store_true", help="Skip PDF text extraction")
parser.add_argument("--full", action="store_true", help="Full re-index (clear all data first)")
args = parser.parse_args()
cfg = Config.load(args.config)
logger.info("EndNote XML: %s", cfg.endnote_xml)
logger.info("PDF directory: %s", cfg.pdf_dir)
logger.info("Database: %s", cfg.db_path)
logger.info("Mode: %s", "FULL re-index" if args.full else "INCREMENTAL (new/changed only)")
if not cfg.endnote_xml.exists():
logger.error("EndNote XML file not found: %s", cfg.endnote_xml)
sys.exit(1)
conn = connect(cfg.db_path)
if args.full:
logger.info("Clearing existing data...")
clear_all(conn)
# --- Phase 1: Parse XML (always upserts — new records addedformat_citation function · python · L13-L63 (51 LOC)src/endnote_mcp/citation.py
def format_citation(ref: dict, style: str = "apa7") -> str:
"""Format a reference dict as a citation string.
Args:
ref: Reference dict with authors (list), title, year, journal, etc.
style: One of 'apa7', 'harvard', 'vancouver', 'chicago', 'ieee'.
"""
style = style.lower().strip()
if style not in STYLES:
raise ValueError(f"Unknown style '{style}'. Choose from: {', '.join(STYLES)}")
# Ensure authors is a list
authors = ref.get("authors", [])
if isinstance(authors, str):
try:
authors = json.loads(authors)
except (json.JSONDecodeError, TypeError):
authors = [authors] if authors else []
title = ref.get("title", "")
year = ref.get("year", "n.d.")
journal = ref.get("journal", "")
volume = ref.get("volume", "")
issue = ref.get("issue", "")
pages = ref.get("pages", "")
doi = ref.get("doi", "")
publisher = ref.get("publisher", "")
place = ref.get("place_publishe_apa7 function · python · L68-L112 (45 LOC)src/endnote_mcp/citation.py
def _apa7(*, authors, title, year, journal, volume, issue, pages, doi, publisher, place, ref_type):
parts = []
# Authors
if authors:
parts.append(_apa_authors(authors))
else:
parts.append(title + ".")
title = ""
# Year
parts.append(f"({year}).")
# Title
if title:
if _is_article(ref_type):
parts.append(f"{title}.")
else:
parts.append(f"*{title}*.")
# Source
if _is_article(ref_type) and journal:
source = f"*{journal}*"
if volume:
source += f", *{volume}*"
if issue:
source += f"({issue})"
if pages:
source += f", {pages}"
source += "."
parts.append(source)
elif publisher:
if place:
parts.append(f"{place}: {publisher}.")
else:
parts.append(f"{publisher}.")
# DOI
if doi:
doi_clean = doi.strip()
if not doi_clean.startswith("http"):
_apa_authors function · python · L115-L125 (11 LOC)src/endnote_mcp/citation.py
def _apa_authors(authors: list[str]) -> str:
"""Format author list for APA 7th."""
formatted = [_invert_author(a) for a in authors]
if len(formatted) == 1:
return formatted[0]
if len(formatted) == 2:
return f"{formatted[0]} & {formatted[1]}"
if len(formatted) <= 20:
return ", ".join(formatted[:-1]) + ", & " + formatted[-1]
# 20+ authors: first 19, ..., last
return ", ".join(formatted[:19]) + ", ... " + formatted[-1]_harvard function · python · L130-L161 (32 LOC)src/endnote_mcp/citation.py
def _harvard(*, authors, title, year, journal, volume, issue, pages, doi, publisher, place, ref_type):
parts = []
if authors:
parts.append(_harvard_authors(authors))
parts.append(f"({year})")
if _is_article(ref_type):
parts.append(f"'{title}',")
if journal:
source = f"*{journal}*"
if volume:
source += f", vol. {volume}"
if issue:
source += f", no. {issue}"
if pages:
source += f", pp. {pages}"
source += "."
parts.append(source)
else:
parts.append(f"*{title}*.")
if publisher:
pub = f"{place}: {publisher}." if place else f"{publisher}."
parts.append(pub)
if doi:
doi_clean = doi.strip()
if not doi_clean.startswith("http"):
doi_clean = f"https://doi.org/{doi_clean}"
parts.append(doi_clean)
return " ".join(parts)_harvard_authors function · python · L164-L172 (9 LOC)src/endnote_mcp/citation.py
def _harvard_authors(authors: list[str]) -> str:
formatted = [_invert_author(a) for a in authors]
if len(formatted) == 1:
return formatted[0]
if len(formatted) == 2:
return f"{formatted[0]} and {formatted[1]}"
if len(formatted) <= 3:
return ", ".join(formatted[:-1]) + " and " + formatted[-1]
return f"{formatted[0]} et al."_vancouver function · python · L177-L201 (25 LOC)src/endnote_mcp/citation.py
def _vancouver(*, authors, title, year, journal, volume, issue, pages, doi, publisher, place, ref_type):
parts = []
if authors:
parts.append(_vancouver_authors(authors) + ".")
parts.append(f"{title}.")
if _is_article(ref_type) and journal:
source = f"{journal}. {year}"
if volume:
source += f";{volume}"
if issue:
source += f"({issue})"
if pages:
source += f":{pages}"
source += "."
parts.append(source)
else:
if place and publisher:
parts.append(f"{place}: {publisher}; {year}.")
elif publisher:
parts.append(f"{publisher}; {year}.")
return " ".join(parts)Repobility · MCP-ready · https://repobility.com
_vancouver_author_name function · python · L211-L219 (9 LOC)src/endnote_mcp/citation.py
def _vancouver_author_name(name: str) -> str:
"""Convert 'Smith, John A.' → 'Smith JA'."""
parts = name.split(",", 1)
if len(parts) == 1:
return name.strip()
surname = parts[0].strip()
given = parts[1].strip()
initials = "".join(w[0].upper() for w in given.split() if w)
return f"{surname} {initials}"_chicago function · python · L224-L257 (34 LOC)src/endnote_mcp/citation.py
def _chicago(*, authors, title, year, journal, volume, issue, pages, doi, publisher, place, ref_type):
parts = []
if authors:
parts.append(_chicago_authors(authors) + ".")
parts.append(f"{year}.")
if _is_article(ref_type):
parts.append(f'"{title}."')
if journal:
source = f"*{journal}*"
if volume:
source += f" {volume}"
if issue:
source += f", no. {issue}"
if pages:
source += f": {pages}"
source += "."
parts.append(source)
else:
parts.append(f"*{title}*.")
if place and publisher:
parts.append(f"{place}: {publisher}.")
elif publisher:
parts.append(f"{publisher}.")
if doi:
doi_clean = doi.strip()
if not doi_clean.startswith("http"):
doi_clean = f"https://doi.org/{doi_clean}"
parts.append(doi_clean)
return " ".join(parts)_chicago_authors function · python · L260-L268 (9 LOC)src/endnote_mcp/citation.py
def _chicago_authors(authors: list[str]) -> str:
if len(authors) == 1:
return authors[0]
if len(authors) == 2:
return f"{authors[0]} and {_direct_order(authors[1])}"
if len(authors) <= 3:
middle = ", ".join(_direct_order(a) for a in authors[1:-1])
return f"{authors[0]}, {middle}, and {_direct_order(authors[-1])}"
return f"{authors[0]} et al."_ieee function · python · L273-L301 (29 LOC)src/endnote_mcp/citation.py
def _ieee(*, authors, title, year, journal, volume, issue, pages, doi, publisher, place, ref_type):
parts = []
if authors:
parts.append(_ieee_authors(authors) + ",")
parts.append(f'"{title},"')
if _is_article(ref_type) and journal:
source = f"*{journal}*"
if volume:
source += f", vol. {volume}"
if issue:
source += f", no. {issue}"
if pages:
source += f", pp. {pages}"
source += f", {year}."
parts.append(source)
else:
if publisher:
parts.append(f"{place}: {publisher}, {year}." if place else f"{publisher}, {year}.")
if doi:
doi_clean = doi.strip()
if not doi_clean.startswith("http"):
doi_clean = f"doi: {doi_clean}"
parts.append(doi_clean)
return " ".join(parts)_ieee_authors function · python · L304-L310 (7 LOC)src/endnote_mcp/citation.py
def _ieee_authors(authors: list[str]) -> str:
formatted = [_direct_order_initials(a) for a in authors]
if len(formatted) == 1:
return formatted[0]
if len(formatted) == 2:
return f"{formatted[0]} and {formatted[1]}"
return ", ".join(formatted[:-1]) + ", and " + formatted[-1]_direct_order_initials function · python · L313-L321 (9 LOC)src/endnote_mcp/citation.py
def _direct_order_initials(name: str) -> str:
"""Convert 'Smith, John A.' → 'J. A. Smith'."""
parts = name.split(",", 1)
if len(parts) == 1:
return name.strip()
surname = parts[0].strip()
given = parts[1].strip()
initials = " ".join(f"{w[0]}." for w in given.split() if w)
return f"{initials} {surname}"_invert_author function · python · L326-L337 (12 LOC)src/endnote_mcp/citation.py
def _invert_author(name: str) -> str:
"""Ensure author name is in 'Surname, Initials.' format for APA/Harvard."""
# If already inverted (contains comma), return as-is
if "," in name:
return name.strip()
# Try to invert "John A. Smith" → "Smith, J. A."
parts = name.strip().split()
if len(parts) < 2:
return name.strip()
surname = parts[-1]
initials = " ".join(f"{p[0]}." for p in parts[:-1])
return f"{surname}, {initials}"_direct_order function · python · L340-L345 (6 LOC)src/endnote_mcp/citation.py
def _direct_order(name: str) -> str:
"""Convert 'Smith, John' → 'John Smith'."""
parts = name.split(",", 1)
if len(parts) == 1:
return name.strip()
return f"{parts[1].strip()} {parts[0].strip()}"Want fix-PRs on findings? Install Repobility's GitHub App · github.com/apps/repobility-bot
format_bibtex function · python · L356-L440 (85 LOC)src/endnote_mcp/citation.py
def format_bibtex(ref: dict) -> str:
"""Format a reference dict as a BibTeX entry.
Args:
ref: Reference dict with authors (list), title, year, journal, etc.
Returns:
A complete BibTeX entry string.
"""
authors = ref.get("authors", [])
if isinstance(authors, str):
try:
authors = json.loads(authors)
except (json.JSONDecodeError, TypeError):
authors = [authors] if authors else []
title = ref.get("title", "")
year = ref.get("year", "")
journal = ref.get("journal", "")
volume = ref.get("volume", "")
issue = ref.get("issue", "")
pages = ref.get("pages", "")
doi = ref.get("doi", "")
publisher = ref.get("publisher", "")
place = ref.get("place_published", "")
isbn = ref.get("isbn", "")
ref_type = ref.get("ref_type", "Journal Article")
rec_number = ref.get("rec_number", 0)
# Determine BibTeX entry type
entry_type = _bibtex_entry_type(ref_type)
# Build c_bibtex_entry_type function · python · L443-L462 (20 LOC)src/endnote_mcp/citation.py
def _bibtex_entry_type(ref_type: str) -> str:
"""Map EndNote reference type to BibTeX entry type."""
rt = ref_type.lower()
if _is_article(rt):
return "article"
if "book section" in rt or "chapter" in rt:
return "incollection"
if "book" in rt:
return "book"
if "conference" in rt or "proceeding" in rt:
return "inproceedings"
if "thesis" in rt or "dissertation" in rt:
return "phdthesis"
if "report" in rt:
return "techreport"
if "patent" in rt:
return "misc"
if "web" in rt or "electronic" in rt:
return "misc"
return "misc"_bibtex_cite_key function · python · L465-L473 (9 LOC)src/endnote_mcp/citation.py
def _bibtex_cite_key(authors: list[str], year: str, rec_number: int) -> str:
"""Generate a BibTeX cite key like 'smith2020r42'."""
if authors:
first = authors[0].split(",")[0].strip()
# Remove non-alphanumeric chars
first = re.sub(r"[^a-zA-Z]", "", first).lower()
else:
first = "unknown"
return f"{first}{year}r{rec_number}"cli function · python · L33-L38 (6 LOC)src/endnote_mcp/cli.py
def cli():
"""Connect your EndNote library to Claude via MCP.
Get started: endnote-mcp setup
"""
passsetup function · python · L45-L104 (60 LOC)src/endnote_mcp/cli.py
def setup():
"""Interactive setup wizard — finds your library and configures everything."""
click.echo()
click.secho(" EndNote MCP — Setup Wizard", bold=True)
click.secho(" Connect your reference library to Claude\n", dim=True)
config_dir = get_config_dir()
config_path = get_default_config_path()
# --- Step 1: Find EndNote XML ---
click.secho("Step 1: EndNote XML Export", bold=True)
xml_path = _find_or_ask_xml()
if xml_path is None:
click.echo("\nSetup cancelled.")
return
click.secho(f" ✓ {xml_path}\n", fg="green")
# --- Step 2: Find PDF directory ---
click.secho("Step 2: PDF Attachments Directory", bold=True)
pdf_dir = _find_or_ask_pdf_dir(xml_path)
if pdf_dir is None:
click.echo("\nSetup cancelled.")
return
click.secho(f" ✓ {pdf_dir}\n", fg="green")
# --- Step 3: Database location ---
db_path = config_dir / "library.db"
# --- Step 4: Save config ---
config_dir.mkdiindex function · python · L115-L129 (15 LOC)src/endnote_mcp/cli.py
def index(full, skip_pdfs, embed, config):
"""Index your EndNote library into the search database.
By default, runs incrementally — only processes new references and PDFs.
"""
config_path = config or get_default_config_path()
if not Path(config_path).exists():
click.secho("No configuration found. Run 'endnote-mcp setup' first.", fg="red")
raise SystemExit(1)
_run_index(config_path, full=full, skip_pdfs=skip_pdfs)
if embed:
_run_embed(config_path, full=full)
else:
# Auto-embed new references if semantic dependencies are available
_auto_embed(config_path)status function · python · L147-L179 (33 LOC)src/endnote_mcp/cli.py
def status(config):
"""Show index statistics."""
config_path = config or get_default_config_path()
if not Path(config_path).exists():
click.secho("No configuration found. Run 'endnote-mcp setup' first.", fg="red")
raise SystemExit(1)
cfg = Config.load(config_path)
if not cfg.db_path.exists():
click.secho("Database not found. Run 'endnote-mcp index' first.", fg="yellow")
return
from endnote_mcp.db import connect, get_stats
conn = connect(cfg.db_path)
stats = get_stats(conn)
conn.close()
click.echo()
click.secho(" EndNote MCP — Library Status", bold=True)
click.echo(f" Config: {config_path}")
click.echo(f" XML source: {cfg.endnote_xml}")
click.echo(f" PDF dir: {cfg.pdf_dir}")
click.echo(f" Database: {cfg.db_path} ({cfg.db_path.stat().st_size / 1024 / 1024:.1f} MB)")
click.echo()
click.echo(f" References: {stats['total_references']:,}")
click.echo(f" PDembed function · python · L188-L199 (12 LOC)src/endnote_mcp/cli.py
def embed(full, config):
"""Generate semantic search embeddings for your references.
Requires: pip install endnote-mcp[semantic]
By default, only embeds references that don't have embeddings yet.
"""
config_path = config or get_default_config_path()
if not Path(config_path).exists():
click.secho("No configuration found. Run 'endnote-mcp setup' first.", fg="red")
raise SystemExit(1)
_run_embed(config_path, full=full)Repobility · severity-and-effort ranking · https://repobility.com
_run_embed function · python · L215-L298 (84 LOC)src/endnote_mcp/cli.py
def _run_embed(config_path, *, full=False):
"""Generate embeddings for references."""
from endnote_mcp import embeddings
if not embeddings.is_available():
click.secho(
"Semantic search dependencies not installed.\n"
"Install with: pip install endnote-mcp[semantic]",
fg="red",
)
raise SystemExit(1)
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, TimeRemainingColumn
from endnote_mcp.config import Config
from endnote_mcp.db import connect, upsert_embedding, clear_embeddings
cfg = Config.load(config_path)
conn = connect(cfg.db_path)
if full:
click.echo("Clearing existing embeddings...")
clear_embeddings(conn)
# Find references without embeddings
rows = conn.execute("""
SELECT r.rec_number, r.title, r.abstract, r.keywords
FROM references_ r
WHERE r.rec_number NOT IN (SELECT rec_number FROM referenc_auto_embed function · python · L301-L325 (25 LOC)src/endnote_mcp/cli.py
def _auto_embed(config_path):
"""Auto-embed new references if semantic dependencies are installed."""
try:
from endnote_mcp import embeddings
if not embeddings.is_available():
return
except Exception:
return
from endnote_mcp.config import Config
from endnote_mcp.db import connect
cfg = Config.load(config_path)
conn = connect(cfg.db_path)
# Check if there are un-embedded references
count = conn.execute("""
SELECT COUNT(*) FROM references_
WHERE rec_number NOT IN (SELECT rec_number FROM reference_embeddings)
""").fetchone()[0]
conn.close()
if count > 0:
click.echo(f"\n {count:,} references without embeddings — auto-embedding...")
_run_embed(config_path, full=False)_find_endnote_libraries function · python · L328-L356 (29 LOC)src/endnote_mcp/cli.py
def _find_endnote_libraries() -> list[Path]:
"""Auto-detect EndNote library files on the system."""
candidates = []
home = Path.home()
search_dirs = [
home / "Documents",
home / "Desktop",
home / "Downloads",
]
# Also check common macOS/Windows locations
if platform.system() == "Darwin":
search_dirs.append(home / "Library")
elif platform.system() == "Windows":
search_dirs.append(Path(os.environ.get("APPDATA", "")))
for d in search_dirs:
if not d.exists():
continue
# Look for .enlp (EndNote library package) and .enl files
for pattern in ("**/*.enlp", "**/*.enl"):
try:
for path in d.glob(pattern):
candidates.append(path)
except PermissionError:
continue
return sorted(set(candidates))_find_xml_exports function · python · L359-L377 (19 LOC)src/endnote_mcp/cli.py
def _find_xml_exports() -> list[Path]:
"""Find XML files that look like EndNote exports."""
candidates = []
home = Path.home()
for d in [home / "Desktop", home / "Documents", home / "Downloads"]:
if not d.exists():
continue
for xml_file in d.glob("*.xml"):
# Quick check: is it an EndNote XML? (look for <records> tag)
try:
with open(xml_file, "rb") as f:
head = f.read(2048)
if b"<records>" in head or b"<record>" in head:
candidates.append(xml_file)
except (PermissionError, OSError):
continue
return sorted(candidates, key=lambda p: p.stat().st_mtime, reverse=True)_find_pdf_dir_for_library function · python · L380-L403 (24 LOC)src/endnote_mcp/cli.py
def _find_pdf_dir_for_library(library_path: Path) -> Path | None:
"""Given an .enlp or .enl path, find the PDF directory."""
# For .enlp packages, look inside
if library_path.suffix == ".enlp":
for pdf_dir in library_path.rglob("PDF"):
if pdf_dir.is_dir():
return pdf_dir
# For .enl files, look for sibling .Data directory
data_dir = library_path.with_suffix(".Data")
if data_dir.exists():
pdf_dir = data_dir / "PDF"
if pdf_dir.exists():
return pdf_dir
# Look next to the library file
parent = library_path.parent
for d in parent.iterdir():
if d.is_dir() and d.name.endswith(".Data"):
pdf_dir = d / "PDF"
if pdf_dir.exists():
return pdf_dir
return None_find_or_ask_xml function · python · L406-L430 (25 LOC)src/endnote_mcp/cli.py
def _find_or_ask_xml() -> Path | None:
"""Find XML exports or ask the user to provide one."""
xml_files = _find_xml_exports()
if xml_files:
click.echo(" Found EndNote XML export(s):")
for i, path in enumerate(xml_files[:5], 1):
size_mb = path.stat().st_size / 1024 / 1024
click.echo(f" [{i}] {path.name} ({size_mb:.1f} MB) — {path.parent}")
click.echo(f" [0] Enter a different path")
choice = click.prompt(" Select", type=int, default=1)
if 1 <= choice <= len(xml_files):
return xml_files[choice - 1]
click.echo(" No EndNote XML export found automatically.")
click.echo(" In EndNote: File → Export → choose XML format")
path_str = click.prompt(" Path to your exported XML file")
path = Path(path_str).expanduser().resolve()
if path.exists():
return path
click.secho(f" File not found: {path}", fg="red")
return None_find_or_ask_pdf_dir function · python · L433-L463 (31 LOC)src/endnote_mcp/cli.py
def _find_or_ask_pdf_dir(xml_path: Path) -> Path | None:
"""Find PDF directory or ask the user."""
# Try to find libraries and their PDF dirs
libraries = _find_endnote_libraries()
pdf_dirs = []
for lib in libraries:
pdf_dir = _find_pdf_dir_for_library(lib)
if pdf_dir:
pdf_count = sum(1 for _ in pdf_dir.glob("*.pdf"))
pdf_dirs.append((pdf_dir, pdf_count, lib))
if pdf_dirs:
click.echo(" Found PDF directories:")
for i, (path, count, lib) in enumerate(pdf_dirs[:5], 1):
click.echo(f" [{i}] {path} ({count:,} PDFs)")
click.echo(f" [0] Enter a different path")
choice = click.prompt(" Select", type=int, default=1)
if 1 <= choice <= len(pdf_dirs):
return pdf_dirs[choice - 1][0]
click.echo(" Could not auto-detect PDF directory.")
click.echo(" This is usually inside your EndNote library's .Data/PDF folder.")
path_str = click.prompt(" Path to you_run_index function · python · L466-L598 (133 LOC)src/endnote_mcp/cli.py
def _run_index(config_path, *, full=False, skip_pdfs=False):
"""Run the indexing process with progress display."""
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, TimeRemainingColumn
from endnote_mcp.config import Config
from endnote_mcp.db import connect, clear_all, upsert_reference, insert_pdf_page, get_stats
from endnote_mcp.endnote_parser import parse_endnote_xml
from endnote_mcp.pdf_indexer import extract_pages, find_pdf
cfg = Config.load(config_path)
if not cfg.endnote_xml.exists():
click.secho(f"XML file not found: {cfg.endnote_xml}", fg="red")
raise SystemExit(1)
conn = connect(cfg.db_path)
if full:
click.echo("Clearing existing data...")
clear_all(conn)
# --- Phase 1: Parse XML ---
# First pass to count records
click.echo(f"Reading {cfg.endnote_xml.name}...")
ref_count = 0
pdf_refs = []
with Progress(
SpinnerColumn(),
Repobility · code-quality intelligence platform · https://repobility.com
_install_claude_desktop function · python · L601-L645 (45 LOC)src/endnote_mcp/cli.py
def _install_claude_desktop():
"""Add MCP server entry to Claude Desktop config."""
if platform.system() == "Darwin":
config_path = Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json"
elif platform.system() == "Windows":
config_path = Path(os.environ.get("APPDATA", "")) / "Claude" / "claude_desktop_config.json"
else:
config_path = Path.home() / ".config" / "claude" / "claude_desktop_config.json"
if not config_path.parent.exists():
click.secho("Claude Desktop config directory not found. Is Claude Desktop installed?", fg="red")
return
# Find uv or python executable
uv_path = _find_uv()
if uv_path:
server_entry = {
"command": str(uv_path),
"args": ["run", "--directory", str(Path(__file__).resolve().parents[2]), "endnote-mcp", "serve"],
}
else:
# Fallback to direct python
server_entry = {
"command": sys.exec_find_uv function · python · L648-L665 (18 LOC)src/endnote_mcp/cli.py
def _find_uv() -> Path | None:
"""Find the uv executable."""
import shutil
# Check common locations
uv = shutil.which("uv")
if uv:
return Path(uv)
for candidate in [
Path.home() / ".local" / "bin" / "uv",
Path.home() / ".cargo" / "bin" / "uv",
Path("/usr/local/bin/uv"),
Path("/opt/homebrew/bin/uv"),
]:
if candidate.exists():
return candidate
return Noneget_config_dir function · python · L13-L20 (8 LOC)src/endnote_mcp/config.py
def get_config_dir() -> Path:
"""Return the platform-appropriate config directory."""
if platform.system() == "Darwin":
return Path.home() / "Library" / "Application Support" / "endnote-mcp"
elif platform.system() == "Windows":
return Path(os.environ.get("APPDATA", Path.home())) / "endnote-mcp"
else:
return Path.home() / ".config" / "endnote-mcp"Config.load method · python · L40-L89 (50 LOC)src/endnote_mcp/config.py
def load(cls, path: str | Path | None = None) -> Config:
"""Load configuration from a YAML file.
Resolution order:
1. Explicit *path* argument
2. ENDNOTE_MCP_CONFIG environment variable
3. Platform config dir (~/.config/endnote-mcp/config.yaml or equivalent)
4. Legacy config.yaml next to pyproject.toml
"""
if path is None:
path = os.environ.get("ENDNOTE_MCP_CONFIG")
if path is None:
default = get_default_config_path()
if default.exists():
path = default
elif _LEGACY_CONFIG_PATH.exists():
path = _LEGACY_CONFIG_PATH
else:
raise FileNotFoundError(
f"No configuration found.\n"
f"Run 'endnote-mcp setup' to configure your library."
)
path = Path(path).expanduser().resolve()
if not path.exists():
raise FileNotFoundError(
connect function · python · L10-L19 (10 LOC)src/endnote_mcp/db.py
def connect(db_path: str | Path) -> sqlite3.Connection:
"""Open (or create) the database and ensure the schema exists."""
db_path = Path(db_path)
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
_create_schema(conn)
return conn_create_schema function · python · L22-L130 (109 LOC)src/endnote_mcp/db.py
def _create_schema(conn: sqlite3.Connection) -> None:
conn.executescript("""
-- Main references table
CREATE TABLE IF NOT EXISTS references_ (
rec_number INTEGER PRIMARY KEY,
ref_type TEXT,
title TEXT,
authors TEXT, -- JSON array of author names
year TEXT,
journal TEXT,
volume TEXT,
issue TEXT,
pages TEXT,
abstract TEXT,
keywords TEXT, -- JSON array
doi TEXT,
url TEXT,
publisher TEXT,
place_published TEXT,
edition TEXT,
isbn TEXT,
label TEXT,
notes TEXT,
pdf_path TEXT -- relative to pdf_dir
);
-- FTS5 index over reference metadata (weighted BM25)
CREATE VIRTUAL TABLE IFupsert_reference function · python · L133-L160 (28 LOC)src/endnote_mcp/db.py
def upsert_reference(conn: sqlite3.Connection, ref: dict) -> None:
"""Insert or update a reference record.
Uses ON CONFLICT DO UPDATE instead of INSERT OR REPLACE to avoid
triggering ON DELETE CASCADE on pdf_pages and reference_embeddings.
"""
conn.execute("""
INSERT INTO references_(
rec_number, ref_type, title, authors, year, journal,
volume, issue, pages, abstract, keywords, doi, url,
publisher, place_published, edition, isbn, label, notes, pdf_path
) VALUES (
:rec_number, :ref_type, :title, :authors, :year, :journal,
:volume, :issue, :pages, :abstract, :keywords, :doi, :url,
:publisher, :place_published, :edition, :isbn, :label, :notes, :pdf_path
)
ON CONFLICT(rec_number) DO UPDATE SET
ref_type=excluded.ref_type, title=excluded.title,
authors=excluded.authors, year=excluded.year,
journal=excluded.journal, volume=excluded.voinsert_pdf_page function · python · L163-L168 (6 LOC)src/endnote_mcp/db.py
def insert_pdf_page(conn: sqlite3.Connection, rec_number: int, page_number: int, text: str) -> None:
"""Insert a single PDF page's text."""
conn.execute("""
INSERT OR REPLACE INTO pdf_pages(rec_number, page_number, text_content)
VALUES (?, ?, ?)
""", (rec_number, page_number, text))Repobility · MCP-ready · https://repobility.com
clear_all function · python · L171-L180 (10 LOC)src/endnote_mcp/db.py
def clear_all(conn: sqlite3.Connection) -> None:
"""Drop all data for a full re-index."""
conn.executescript("""
DELETE FROM reference_embeddings;
DELETE FROM pdf_pages;
DELETE FROM references_;
-- Rebuild FTS indexes
INSERT INTO references_fts(references_fts) VALUES('rebuild');
INSERT INTO pdf_fts(pdf_fts) VALUES('rebuild');
""")upsert_embedding function · python · L183-L188 (6 LOC)src/endnote_mcp/db.py
def upsert_embedding(conn: sqlite3.Connection, rec_number: int, embedding: bytes, model_name: str) -> None:
"""Insert or replace an embedding vector for a reference."""
conn.execute(
"INSERT OR REPLACE INTO reference_embeddings(rec_number, embedding, model_name) VALUES (?, ?, ?)",
(rec_number, embedding, model_name),
)get_stats function · python · L197-L210 (14 LOC)src/endnote_mcp/db.py
def get_stats(conn: sqlite3.Connection) -> dict:
"""Return index statistics."""
ref_count = conn.execute("SELECT COUNT(*) FROM references_").fetchone()[0]
pdf_page_count = conn.execute("SELECT COUNT(*) FROM pdf_pages").fetchone()[0]
refs_with_pdf = conn.execute(
"SELECT COUNT(DISTINCT rec_number) FROM pdf_pages"
).fetchone()[0]
embeddings_count = conn.execute("SELECT COUNT(*) FROM reference_embeddings").fetchone()[0]
return {
"total_references": ref_count,
"total_pdf_pages": pdf_page_count,
"references_with_pdf": refs_with_pdf,
"references_with_embeddings": embeddings_count,
}is_available function · python · L24-L30 (7 LOC)src/endnote_mcp/embeddings.py
def is_available() -> bool:
"""Check if sentence-transformers is installed."""
try:
import sentence_transformers # noqa: F401
return True
except ImportError:
return Falseload_model function · python · L33-L43 (11 LOC)src/endnote_mcp/embeddings.py
def load_model(model_name: str = MODEL_NAME):
"""Load the embedding model (cached after first call)."""
global _model
if _model is not None:
return _model
from sentence_transformers import SentenceTransformer
logger.info("Loading embedding model: %s", model_name)
_model = SentenceTransformer(model_name)
return _modelbuild_search_text function · python · L58-L74 (17 LOC)src/endnote_mcp/embeddings.py
def build_search_text(ref: dict) -> str:
"""Combine title + abstract + keywords into embedding input text."""
parts = []
if ref.get("title"):
parts.append(ref["title"])
if ref.get("abstract"):
parts.append(ref["abstract"])
keywords = ref.get("keywords")
if keywords:
if isinstance(keywords, str):
try:
keywords = json.loads(keywords)
except (json.JSONDecodeError, TypeError):
keywords = []
if keywords:
parts.append("Keywords: " + ", ".join(keywords))
return " ".join(parts)cosine_similarity function · python · L82-L87 (6 LOC)src/endnote_mcp/embeddings.py
def cosine_similarity(a: bytes, b: bytes) -> float:
"""Compute cosine similarity between two embedding blobs."""
va = _blob_to_array(a)
vb = _blob_to_array(b)
# Vectors are already normalized, so dot product = cosine similarity
return float(np.dot(va, vb))search_semantic function · python · L90-L147 (58 LOC)src/endnote_mcp/embeddings.py
def search_semantic(
conn: sqlite3.Connection,
query_embedding: bytes,
*,
limit: int = 20,
) -> list[dict]:
"""Find nearest references by cosine similarity.
Uses Python-side computation (fast enough for ~4K vectors).
Returns list of dicts with rec_number, similarity, and metadata.
"""
rows = conn.execute(
"SELECT rec_number, embedding FROM reference_embeddings"
).fetchall()
if not rows:
return []
query_vec = _blob_to_array(query_embedding)
# Build matrix of all embeddings for vectorized computation
rec_numbers = [row["rec_number"] for row in rows]
matrix = np.stack([_blob_to_array(row["embedding"]) for row in rows])
# Cosine similarity (vectors are normalized, so just dot product)
similarities = matrix @ query_vec
# Get top-k indices
top_k = min(limit, len(rec_numbers))
top_indices = np.argpartition(-similarities, top_k)[:top_k]
top_indices = top_indices[np.argsort(-similarities[toWant fix-PRs on findings? Install Repobility's GitHub App · github.com/apps/repobility-bot
search_by_embedding function · python · L150-L161 (12 LOC)src/endnote_mcp/embeddings.py
def search_by_embedding(
conn: sqlite3.Connection,
embedding: bytes,
*,
exclude_rec: int | None = None,
limit: int = 10,
) -> list[dict]:
"""Find nearest references to a given embedding (for find_related)."""
results = search_semantic(conn, embedding, limit=limit + 1)
if exclude_rec is not None:
results = [r for r in results if r["rec_number"] != exclude_rec]
return results[:limit]get_embedding function · python · L164-L170 (7 LOC)src/endnote_mcp/embeddings.py
def get_embedding(conn: sqlite3.Connection, rec_number: int) -> bytes | None:
"""Get the stored embedding for a reference."""
row = conn.execute(
"SELECT embedding FROM reference_embeddings WHERE rec_number = ?",
(rec_number,),
).fetchone()
return row["embedding"] if row else None_parse_authors_short function · python · L179-L191 (13 LOC)src/endnote_mcp/embeddings.py
def _parse_authors_short(authors_json: str) -> str:
"""Convert JSON author list to a short display string."""
try:
authors = json.loads(authors_json) if authors_json else []
except (json.JSONDecodeError, TypeError):
return str(authors_json)
if not authors:
return "Unknown"
if len(authors) == 1:
return authors[0]
if len(authors) == 2:
return f"{authors[0]} & {authors[1]}"
return f"{authors[0]} et al."page 1 / 2next ›