Function bodies 35 total

avg_max_similarity function · python · L56-L73 (18 LOC)

gen_embeddings.py

def avg_max_similarity(embs_a, embs_b):
    """For each domain in A, find max similarity to any domain in B. Average both directions."""
    if len(embs_a) == 0 or len(embs_b) == 0:
        return 0.0

    # A→B direction
    scores_ab = []
    for ea in embs_a:
        max_sim = max(cos_sim(ea, eb) for eb in embs_b)
        scores_ab.append(max_sim)

    # B→A direction
    scores_ba = []
    for eb in embs_b:
        max_sim = max(cos_sim(eb, ea) for ea in embs_a)
        scores_ba.append(max_sim)

    return (np.mean(scores_ab) + np.mean(scores_ba)) / 2

extract_nace function · python · L43-L45 (3 LOC)

parse_pdf.py

def extract_nace(text):
    codes = re.findall(r'\b(\d{2})\b', text)
    return list(dict.fromkeys(c for c in codes if 1 <= int(c) <= 99))

find_domain_section function · python · L47-L63 (17 LOC)

parse_pdf.py

def find_domain_section(text):
    markers = ['Domény specializace', 'Tematické priority:']
    start = None
    for m in markers:
        idx = text.find(m)
        if idx >= 0:
            start = idx
            break
    if start is None:
        return ""
    end = len(text)
    for em in ['Vznikající', 'Emerging', 'Realizace krajské', 'Realizace Krajské',
               'Instituce s hlavní', 'V následující aktualizaci', 'Realizační rámec']:
        eidx = text.find(em, start + 50)
        if eidx >= 0:
            end = min(end, eidx)
    return text[start:end]

parse_generic function · python · L65-L182 (118 LOC)

parse_pdf.py

def parse_generic(text, kraj_name):
    """Generic parser that works for most kraje."""
    section = find_domain_section(text)
    if not section:
        return []

    lines = section.split('\n')
    domains = []
    current_name = None
    current_text = []
    current_nace = []

    skip_patterns = [
        'Domény specializace', 'Tematické priority', 'Klíčová hospodářská',
        'Informuje ty', 'předpokládat', 'perspektiv', 'Tematická specializace',
        'Z hlediska EDP', 'Z pohledu koncových', 'byly formulovány',
        'Preferované perspektivní', 'Související odvětví',
    ]

    # For MSK, skip intro paragraphs until numbered items
    in_intro = kraj_name == 'Moravskoslezský kraj'

    for line in lines:
        line = line.strip()
        if not line:
            continue

        if any(line.startswith(p) for p in skip_patterns):
            continue

        if in_intro:
            if re.match(r'^\d+\.', line):
                in_intro = False
            else:

parse_praha function · python · L184-L210 (27 LOC)

parse_pdf.py

def parse_praha(text):
    section = find_domain_section(text)
    if not section:
        return []

    domains = []
    parts = re.split(r'\n([A-D])\.\s+', section)

    for i in range(1, len(parts)-1, 2):
        letter = parts[i]
        content = parts[i+1]
        lines = content.strip().split('\n')
        name = lines[0].strip().rstrip(',')
        desc_lines = []
        for l in lines[1:]:
            clean = l.strip().lstrip('●•- ').strip()
            if clean:
                desc_lines.append(clean)
        desc = ' '.join(desc_lines)
        domains.append({
            'nazev': f"{name}",
            'popis': desc,
            'nace': [],
            'text_pro_embedding': f"{name}: {desc}"
        })

    return domains

extract_nace_codes function · python · L56-L67 (12 LOC)

parse_pdf_v2.py

def extract_nace_codes(text):
    """Extract 2-digit NACE codes from text. Handles various formats."""
    # Find all 2-digit numbers that look like NACE codes (01-99)
    codes = re.findall(r'\b(\d{2})(?:\.\d+)?\b', text)
    # Deduplicate preserving order, filter valid range
    seen = set()
    result = []
    for c in codes:
        if c not in seen and 1 <= int(c) <= 99:
            seen.add(c)
            result.append(c)
    return result

find_section function · python · L69-L84 (16 LOC)

parse_pdf_v2.py

def find_section(text, start_markers, end_markers, start_offset=0):
    """Find text section between start and end markers."""
    start = None
    for m in start_markers:
        idx = text.find(m, start_offset)
        if idx >= 0:
            start = idx
            break
    if start is None:
        return "", 0
    end = len(text)
    for em in end_markers:
        eidx = text.find(em, start + 50)
        if eidx >= 0 and eidx < end:
            end = eidx
    return text[start:end], start

Repobility analyzer · published findings · https://repobility.com

is_garbage function · python · L89-L99 (11 LOC)

parse_pdf_v2.py

def is_garbage(line):
    """Detect OCR garbage lines."""
    if len(line.strip()) < 3:
        return True
    # High ratio of unusual character sequences
    alpha = sum(1 for c in line if c.isalpha())
    if alpha > 0 and len(line.strip()) > 10:
        spaces = line.count(' ')
        if spaces / max(1, len(line.strip())) > 0.4:
            return True
    return False

clean_domain_name function · python · L101-L105 (5 LOC)

parse_pdf_v2.py

def clean_domain_name(name):
    """Clean up a domain name string."""
    name = name.strip().rstrip(',').rstrip('.')
    name = re.sub(r'\s+', ' ', name)
    return name

parse_jihocesky function · python · L109-L164 (56 LOC)

parse_pdf_v2.py

def parse_jihocesky(text):
    """Plain headers, bullets •, NACE on 'Vazba na CZ-NACE: hlavni vazby: ...'"""
    section, _ = find_section(text,
        ['Domény specializace Jihočeského kraje'],
        ['Vznikající', 'Emerging', 'Realizace krajské'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Domény specializace'):
            continue

        nace_match = re.search(r'Vazba na CZ[- ]?NACE[:\s]*(.*)', line_s, re.IGNORECASE)
        if nace_match:
            if current:
                current['nace'] = extract_nace_codes(nace_match.group(1))
                domains.append(current)
                current = None
            continue

        if line_s.startswith(('•', '●', '–', '-')) and current:
            clean = line_s.lstrip('•●–- ').strip()
            if clean:

parse_jihomoravsky function · python · L167-L209 (43 LOC)

parse_pdf_v2.py

def parse_jihomoravsky(text):
    """Bullets • with inline NACE '(těžiště v CZ-NACE NN)'. Skip cluster diagram."""
    section, _ = find_section(text,
        ['Domény specializace kraje/ Klíčová', 'Domény specializace kraje/'],
        ['Vznikající', 'Emerging'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        # Domain lines are bullets with inline NACE
        if line_s.startswith('•') and 'CZ-NACE' in line_s:
            nace_match = re.search(r'\(těžišt[eě]\s+v\s+CZ-NACE\s+([\d,. a částicásti]+)\)', line_s)
            nace_codes = extract_nace_codes(nace_match.group(1)) if nace_match else []
            # Remove the NACE parenthetical from domain name
            name = re.sub(r'\s*\(těžišt[eě][^)]+\)', '', line_s)
            name = name.lstrip('•●– ').strip()
            if name and len(name) > 3:

parse_karlovarsky function · python · L212-L270 (59 LOC)

parse_pdf_v2.py

def parse_karlovarsky(text):
    """Plain headers, bullets •, NACE on 'Vazba na CZ-NACE: ...'"""
    section, _ = find_section(text,
        ['Domény specializace kraje'],
        ['Vznikající', 'Emerging', 'Realizace krajské'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Domény specializace'):
            continue

        nace_match = re.search(r'Vazba na CZ[- ]?NACE[:\s]*(.*)', line_s, re.IGNORECASE)
        if nace_match:
            if current:
                current['nace'] = extract_nace_codes(nace_match.group(1))
                domains.append(current)
                current = None
            continue

        if line_s.startswith(('•', '●', '–')) and current:
            clean = line_s.lstrip('•●–- ').strip()
            if clean:
                current['des

parse_kralovehradecky function · python · L273-L332 (60 LOC)

parse_pdf_v2.py

def parse_kralovehradecky(text):
    """Plain headers, bullets •, NACE on 'Vazba domény na CZ-NACE: ...' with semicolons"""
    section, _ = find_section(text,
        ['Domény specializace kraje'],
        ['Vznikající', 'Emerging', 'Realizace krajské'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Domény specializace'):
            continue

        nace_match = re.search(r'Vazb[ay]\s+dom[eé]n[yě]?\s+na\s+CZ[- ]?NACE[:\s]*(.*)', line_s, re.IGNORECASE)
        if nace_match:
            if current:
                current['nace'] = extract_nace_codes(nace_match.group(1))
                domains.append(current)
                current = None
            continue

        # Sub-category headers within domains (e.g. "Činnost institucí:", "Tradiční činnosti:")
        if lin

parse_liberecky function · python · L335-L387 (53 LOC)

parse_pdf_v2.py

def parse_liberecky(text):
    """Plain headers, bullets •, NACE on 'Vazba domény na CZ-NACE: ...'"""
    section, _ = find_section(text,
        ['Domény specializace Libereckého kraje'],
        ['Vznikající', 'Emerging', 'Realizace krajské', 'Realiza'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line) or is_garbage(line_s):
            continue
        if line_s.startswith('Domény specializace'):
            continue

        nace_match = re.search(r'Vazb[ay]\s+dom[eé]n[yě]?\s+na\s+CZ[- ]?NACE[:\s]*(.*)', line_s, re.IGNORECASE)
        if nace_match:
            if current:
                current['nace'] = extract_nace_codes(nace_match.group(1))
                domains.append(current)
                current = None
            continue

        if line_s.startswith(('•', '●', '–')) and current:
            clean = line_s.l

parse_moravskoslezsky function · python · L390-L474 (85 LOC)

parse_pdf_v2.py

def parse_moravskoslezsky(text):
    """Numbered domains in 2 groups, 'Zaměření domény:' blocks, 'Hlavní vazby na CZ-NACE - NN'"""
    section, _ = find_section(text,
        ['Tematická specializace RIS MSK', 'Z pohledu koncových trhů'],
        ['Realizace krajské', 'Instituce s hlavní výkonnou'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None
    in_description = False

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue

        # Skip intro paragraphs
        if any(skip in line_s for skip in [
            'Tematická specializace', 'Z hlediska EDP', 'Z pohledu koncových',
            'Z pohledu technologických', 'byly formulovány', 'prioritám při rozvoji',
            'aktualizaci RIS MSK', 'Současně s tím', 'příležitosti pro více',
            'specializace vychází', 'vzájemně propojených'
        ]):
            continue

        # NACE line — use

Repobility (the analyzer behind this table) · https://repobility.com

parse_olomoucky function · python · L477-L515 (39 LOC)

parse_pdf_v2.py

def parse_olomoucky(text):
    """Bullets • with inline description after em dash –. No NACE. No Emerging."""
    section, _ = find_section(text,
        ['Domény specializace kraje:'],
        ['RIS3 mise', 'Realizace krajské', 'Realizace Krajské', 'Instituce s hlavní'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Domény specializace'):
            continue

        if line_s.startswith(('•', '●')):
            clean = line_s.lstrip('•●– ').strip()
            if '–' in clean:
                parts = clean.split('–', 1)
                name = parts[0].strip()
                desc = parts[1].strip() if len(parts) > 1 else ''
            elif '−' in clean:
                parts = clean.split('−', 1)
                name = parts[0].strip()
                desc = parts[1].strip() if len(parts

parse_pardubicky function · python · L518-L570 (53 LOC)

parse_pdf_v2.py

def parse_pardubicky(text):
    """Plain headers, narrative descriptions, 'Stěžejní CZ NACE pro tuto doménu: ...'"""
    section, _ = find_section(text,
        ['Domény specializace kraje'],
        ['Vznikající', 'Emerging', 'Realizace krajské', 'směřuje budoucí'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Domény specializace'):
            continue

        nace_match = re.search(r'Stěžejní\s+CZ\s*[-]?NACE\s+pro\s+tuto\s+doménu[:\s]*(.*)', line_s, re.IGNORECASE)
        if nace_match:
            if current:
                current['nace'] = extract_nace_codes(nace_match.group(1))
                domains.append(current)
                current = None
            continue

        if line_s.startswith(('•', '●', '–')) and current:
            clean = line_s.lstrip('•

parse_plzensky function · python · L573-L632 (60 LOC)

parse_pdf_v2.py

def parse_plzensky(text):
    """Plain headers, dash-bullet sub-items, 'Související odvětví (CZ-NACE)' then descriptive line"""
    section, _ = find_section(text,
        ['Tematické priority'],
        ['Realizace krajské', 'Realizace Krajské', 'Instituce s hlavní'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None
    in_nace_block = False

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Tematické priority'):
            continue

        # NACE block header
        if 'Související odvětví' in line_s and 'CZ-NACE' in line_s:
            in_nace_block = True
            continue

        if in_nace_block:
            if line_s.startswith('-') or line_s.startswith('–'):
                codes = extract_nace_codes(line_s)
                if current:
                    current['nace'].extend(codes)
            else:

parse_praha function · python · L635-L674 (40 LOC)

parse_pdf_v2.py

def parse_praha(text):
    """Letter-numbered A.-D., bullets ●. No NACE."""
    section, _ = find_section(text,
        ['Tematické priority – Domény', 'Tematické priority — Domény', 'Tematické priority:'],
        ['V následující aktualizaci', 'Realizace krajské'])
    if not section:
        return [], []

    domains = []
    parts = re.split(r'\n([A-D])\.\s+', section)

    for i in range(1, len(parts)-1, 2):
        content = parts[i+1]
        lines = content.strip().split('\n')
        name = lines[0].strip().rstrip(',')
        desc_lines = []
        for l in lines[1:]:
            clean = l.strip().lstrip('●•–- ').strip()
            if clean and not is_page_marker(l) and len(clean) > 3:
                desc_lines.append(clean)
        domains.append({
            'nazev': clean_domain_name(name),
            'desc_lines': desc_lines,
            'nace': [],
        })

    # Check for planned future domains
    emerging = []
    em_section, _ = find_section(text,
        ['V

parse_stredocesky function · python · L677-L733 (57 LOC)

parse_pdf_v2.py

def parse_stredocesky(text):
    """Plain headers with inline NACE '(CZ-NACE NN, NN)', bullets • with sub-NACEs."""
    section, _ = find_section(text,
        ['Domény specializace kraje (vertikální', 'Domény specializace kraje'],
        ['Vznikající', 'Emerging', 'Realizace krajské'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Domény specializace'):
            continue
        # Skip footnotes
        if re.match(r'^\d+\s+viz:', line_s):
            continue

        inline_nace = re.search(r'\(CZ-NACE\s+([\d,.\s]+)\)', line_s)
        if inline_nace and not line_s.startswith(('•', '●')):
            # Domain name with inline NACE
            if current and (current['desc_lines'] or current['nace']):
                domains.append(current)
            name = re.sub(

parse_ustecky function · python · L736-L789 (54 LOC)

parse_pdf_v2.py

def parse_ustecky(text):
    """Three-tier structure: Hlavní, KETs, Subdomény. Bullets •. No NACE."""
    section, _ = find_section(text,
        ['Tematické priority:'],
        ['Realizace krajské', 'Realizace Krajské', 'Instituce s hlavní'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current_tier = None
    current = None

    tier_headers = {
        'Hlavní oblasti specializace': 'hlavní',
        'KETs (průřezové)': 'KETs',
        'Užší subdomény': 'subdomény',
    }

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Tematické priority'):
            continue

        # Detect tier
        for header, tier in tier_headers.items():
            if header in line_s:
                current_tier = tier
                break

        if line_s.startswith(('•', '●')):
            clean = line_s.lstrip('•●–- ').strip()
            if

parse_vysocina function · python · L792-L848 (57 LOC)

parse_pdf_v2.py

def parse_vysocina(text):
    """Plain headers, bullets • with inline NACE '(NACE NN)' (no CZ- prefix)."""
    section, _ = find_section(text,
        ['Domény specializace Kraje Vysočina'],
        ['Vznikající', 'Emerging', 'Realizace krajské'])
    if not section:
        return [], []

    lines = section.split('\n')
    domains = []
    current = None

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if line_s.startswith('Domény specializace'):
            continue

        if line_s.startswith(('•', '●', '–')) and current:
            clean = line_s.lstrip('•●–- ').strip()
            # Extract inline NACE codes
            nace_matches = re.findall(r'\((?:CZ-)?NACE\s+([\d,. a]+)\)', clean)
            for m in nace_matches:
                current['nace'].extend(extract_nace_codes(m))
            # Remove NACE references from description
            clean_desc = re.sub(r'\((?:CZ-)?NACE[^)]+\)', '', c

parse_zlinsky function · python · L851-L946 (96 LOC)

parse_pdf_v2.py

def parse_zlinsky(text):
    """Two domain sets: horizontal (under Strategická orientace) and vertical (under Tematické priority).
    Vertical grouped in 3 tiers 1) 2) 3). No NACE."""

    # Vertical (application) domains — under Tematické priority
    v_section, _ = find_section(text,
        ['Odvětvové (aplikační) domény', 'Tematické priority:'],
        ['Vznikající', 'Emerging', 'Realizace krajské'])
    if not v_section:
        return [], []

    lines = v_section.split('\n')
    domains = []
    current = None
    current_tier = None

    for line in lines:
        line_s = line.strip()
        if not line_s or is_page_marker(line):
            continue
        if any(skip in line_s for skip in ['Odvětvové (aplikační)', 'Tematické priority']):
            continue

        # Tier headers: 1) ... 2) ... 3) ...
        tier_match = re.match(r'^(\d)\)\s+(.+)', line_s)
        if tier_match:
            current_tier = tier_match.group(2)[:50]
            continue

        if line_

Methodology: Repobility · https://repobility.com/research/state-of-ai-code-2026/

_finalize function · python · L951-L972 (22 LOC)

parse_pdf_v2.py

def _finalize(domains):
    """Convert raw domain dicts to final format."""
    result = []
    for d in domains:
        nazev = d['nazev']
        if len(nazev) < 4 or nazev.startswith('Zaměření'):
            continue
        popis = ' '.join(d['desc_lines']).strip()
        # Clean up description
        popis = re.sub(r'\s+', ' ', popis)
        nace = list(dict.fromkeys(d['nace']))  # Deduplicate
        text_pro_embedding = f"{nazev}: {popis}" if popis else nazev
        entry = {
            'nazev': nazev,
            'popis': popis,
            'nace': nace,
            'text_pro_embedding': text_pro_embedding,
        }
        if 'tier' in d and d['tier']:
            entry['tier'] = d['tier']
        result.append(entry)
    return result

App function · javascript · L20-L134 (115 LOC)

src/App.jsx

export default function App() {
  const [currentSlide, setCurrentSlide] = useState(0)
  const [isPaused, setIsPaused] = useState(true)

  const goTo = useCallback((idx) => {
    setCurrentSlide(Math.max(0, Math.min(TOTAL_SLIDES - 1, idx)))
  }, [])

  const next = useCallback(() => goTo(currentSlide + 1), [currentSlide, goTo])
  const prev = useCallback(() => goTo(currentSlide - 1), [currentSlide, goTo])

  // Keyboard navigation
  useEffect(() => {
    const handler = (e) => {
      if (e.key === 'ArrowRight' || e.key === ' ') { e.preventDefault(); next() }
      if (e.key === 'ArrowLeft') { e.preventDefault(); prev() }
      if (e.key === 'Home') { e.preventDefault(); goTo(0) }
      if (e.key === 'End') { e.preventDefault(); goTo(TOTAL_SLIDES - 1) }
      if (e.key === 'p') setIsPaused(p => !p)
    }
    window.addEventListener('keydown', handler)
    return () => window.removeEventListener('keydown', handler)
  }, [next, prev, goTo])

  // Touch swipe navigation
  const touchStartX

InfoPanel function · javascript · L3-L39 (37 LOC)

src/InfoPanel.jsx

export default function InfoPanel({ text }) {
  const [open, setOpen] = useState(false)

  return (
    <>
      <button
        onClick={() => setOpen(true)}
        className="absolute top-6 right-6 z-20 w-7 h-7 flex items-center justify-center rounded-full bg-white/80 hover:bg-white shadow text-[#0087CD] text-sm font-bold cursor-pointer"
        title="Vysvětlivka"
      >
        i
      </button>
      {open && (
        <div
          className="fixed inset-0 z-50 flex items-center justify-center"
          onClick={() => setOpen(false)}
        >
          <div className="absolute inset-0 bg-black/30" />
          <div
            className="relative bg-white rounded-xl shadow-xl p-6 max-w-lg mx-4"
            onClick={(e) => e.stopPropagation()}
          >
            <button
              onClick={() => setOpen(false)}
              className="absolute top-3 right-3 w-6 h-6 flex items-center justify-center rounded-full hover:bg-gray-100 text-[#9B9BA0] text-sm"
            >

SlideConclusion function · javascript · L3-L153 (151 LOC)

src/slides/SlideConclusion.jsx

export default function SlideConclusion() {
  const [visible, setVisible] = useState(false)

  useEffect(() => {
    const t = setTimeout(() => setVisible(true), 200)
    return () => clearTimeout(t)
  }, [])

  const pipeline = [
    {
      stage: 'Explorace',
      color: '#B07ED8',
      steps: [
        'Analytická zvědavost: jsou si krajské specializace nějak podobné? Dá se to zjistit?',
        'Mapování dostupných zdrojů: krajské karty NRIS3, databáze CEP, ČSÚ. Žádná hotová data, žádné zadání.',
      ],
    },
    {
      stage: 'Sběr dat',
      color: '#0087CD',
      steps: [
        'Parsování PDF krajských karet, extrakce strukturovaných dat ze 14 různých formátů',
      ],
    },
    {
      stage: 'Analýza',
      color: '#E6AF14',
      steps: [
        'Převod textů domén na vektory jazykovým modelem, výpočet sémantické podobnosti',
        'Volba a ladění metod v dialogu: prahové hodnoty, Jaccard vs. embedding, FORD + sémantika',
        'Propojení tisíců VaV projekt

computeJaccard function · javascript · L31-L37 (7 LOC)

src/slides/SlideJaccardHeatmap.jsx

function computeJaccard(setA, setB) {
  if (setA.size === 0 && setB.size === 0) return null
  if (setA.size === 0 || setB.size === 0) return null
  const intersection = new Set([...setA].filter(x => setB.has(x)))
  const union = new Set([...setA, ...setB])
  return intersection.size / union.size
}

getSharedCodes function · javascript · L39-L41 (3 LOC)

src/slides/SlideJaccardHeatmap.jsx

function getSharedCodes(setA, setB) {
  return [...setA].filter(x => setB.has(x)).sort()
}

SlideMapVav function · javascript · L19-L328 (310 LOC)

src/slides/SlideMapVav.jsx

export default function SlideMapVav() {
  const [krajeGeo, setKrajeGeo] = useState(null)
  const [okresyGeo, setOkresyGeo] = useState(null)
  const [sektorData, setSektorData] = useState(null)
  const [tooltip, setTooltip] = useState(null)
  const [dimensions, setDimensions] = useState({ width: 0, height: 0 })

  useEffect(() => {
    Promise.all([
      fetch(`${import.meta.env.BASE_URL}data/kraje.geojson`).then(r => r.json()),
      fetch(`${import.meta.env.BASE_URL}data/okresy.geojson`).then(r => r.json()),
      fetch(`${import.meta.env.BASE_URL}data/vav_sektory_2024.json`).then(r => r.json()),
    ]).then(([kraje, okresy, sektory]) => {
      setKrajeGeo(kraje)
      setOkresyGeo(okresy)
      setSektorData(sektory)
    })
  }, [])

  useEffect(() => {
    const update = () => setDimensions({ width: window.innerWidth, height: window.innerHeight })
    update()
    window.addEventListener('resize', update)
    return () => window.removeEventListener('resize', update)
  }, [])

  //

computeJaccard function · javascript · L31-L36 (6 LOC)

src/slides/SlideSemantic.jsx

function computeJaccard(setA, setB) {
  if (setA.size === 0 || setB.size === 0) return 0
  const intersection = new Set([...setA].filter(x => setB.has(x)))
  const union = new Set([...setA, ...setB])
  return intersection.size / union.size
}

All rows scored by the Repobility analyzer (https://repobility.com)

computeJaccard function · javascript · L48-L53 (6 LOC)

src/slides/SlideSemanticMerged.jsx

function computeJaccard(setA, setB) {
  if (setA.size === 0 || setB.size === 0) return 0
  const intersection = new Set([...setA].filter(x => setB.has(x)))
  const union = new Set([...setA, ...setB])
  return intersection.size / union.size
}

SlideSemanticNetwork function · javascript · L31-L413 (383 LOC)

src/slides/SlideSemanticNetwork.jsx

export default function SlideSemanticNetwork() {
  const [semData, setSemData] = useState(null)
  const [tooltip, setTooltip] = useState(null)
  const [hoveredNode, setHoveredNode] = useState(null)
  const [dimensions, setDimensions] = useState({ width: 0, height: 0 })
  const [nodes, setNodes] = useState([])
  const [links, setLinks] = useState([])
  const simRef = useRef(null)
  const dragRef = useRef(null)
  const svgRef = useRef(null)

  useEffect(() => {
    fetch(`${import.meta.env.BASE_URL}data/semanticka_podobnost.json`).then(r => r.json()).then(setSemData)
  }, [])

  useEffect(() => {
    const update = () => setDimensions({ width: window.innerWidth, height: window.innerHeight })
    update()
    window.addEventListener('resize', update)
    return () => window.removeEventListener('resize', update)
  }, [])

  // Build graph data
  const graphData = useMemo(() => {
    if (!semData) return null
    const krajNames = semData.kraje

    // Node color: shades of one blue for NAC

SlideTitle function · javascript · L4-L89 (86 LOC)

src/slides/SlideTitle.jsx

export default function SlideTitle() {
  const [visible, setVisible] = useState(false)
  const [krajeGeo, setKrajeGeo] = useState(null)
  const [dimensions, setDimensions] = useState({ width: 0, height: 0 })

  useEffect(() => {
    const t = setTimeout(() => setVisible(true), 200)
    return () => clearTimeout(t)
  }, [])

  useEffect(() => {
    fetch(`${import.meta.env.BASE_URL}data/kraje.geojson`).then(r => r.json()).then(setKrajeGeo)
  }, [])

  useEffect(() => {
    const update = () => setDimensions({ width: window.innerWidth, height: window.innerHeight })
    update()
    window.addEventListener('resize', update)
    return () => window.removeEventListener('resize', update)
  }, [])

  const pathData = useMemo(() => {
    if (!krajeGeo || dimensions.width === 0) return null
    const { width, height } = dimensions
    const mapW = width * 0.70
    const mapH = height * 0.70
    const proj = d3.geoMercator().fitSize([mapW, mapH], krajeGeo)
    const [tx, ty] = proj.translate()

SlideVavEkosystem function · javascript · L22-L270 (249 LOC)

src/slides/SlideVavEkosystem.jsx

export default function SlideVavEkosystem() {
  const [data, setData] = useState(null)
  const [tooltip, setTooltip] = useState(null)
  const [dimensions, setDimensions] = useState({ width: 0, height: 0 })

  useEffect(() => {
    fetch(`${import.meta.env.BASE_URL}data/vav_semantic_match.json`).then(r => r.json()).then(setData)
  }, [])

  useEffect(() => {
    const update = () => setDimensions({ width: window.innerWidth, height: window.innerHeight })
    update()
    window.addEventListener('resize', update)
    return () => window.removeEventListener('resize', update)
  }, [])

  // Sort kraje by total projects descending
  const sortedKraje = useMemo(() => {
    if (!data) return null
    return Object.entries(data.kraje)
      .sort((a, b) => b[1].celkem_projektu - a[1].celkem_projektu)
  }, [data])

  if (!sortedKraje || dimensions.width === 0) {
    return (
      <div className="w-full h-full flex items-center justify-center bg-[#f8f9fa]">
        <p className="text-[#777] text