Function bodies 35 total
avg_max_similarity function · python · L56-L73 (18 LOC)gen_embeddings.py
def avg_max_similarity(embs_a, embs_b):
"""For each domain in A, find max similarity to any domain in B. Average both directions."""
if len(embs_a) == 0 or len(embs_b) == 0:
return 0.0
# A→B direction
scores_ab = []
for ea in embs_a:
max_sim = max(cos_sim(ea, eb) for eb in embs_b)
scores_ab.append(max_sim)
# B→A direction
scores_ba = []
for eb in embs_b:
max_sim = max(cos_sim(eb, ea) for ea in embs_a)
scores_ba.append(max_sim)
return (np.mean(scores_ab) + np.mean(scores_ba)) / 2extract_nace function · python · L43-L45 (3 LOC)parse_pdf.py
def extract_nace(text):
codes = re.findall(r'\b(\d{2})\b', text)
return list(dict.fromkeys(c for c in codes if 1 <= int(c) <= 99))find_domain_section function · python · L47-L63 (17 LOC)parse_pdf.py
def find_domain_section(text):
markers = ['Domény specializace', 'Tematické priority:']
start = None
for m in markers:
idx = text.find(m)
if idx >= 0:
start = idx
break
if start is None:
return ""
end = len(text)
for em in ['Vznikající', 'Emerging', 'Realizace krajské', 'Realizace Krajské',
'Instituce s hlavní', 'V následující aktualizaci', 'Realizační rámec']:
eidx = text.find(em, start + 50)
if eidx >= 0:
end = min(end, eidx)
return text[start:end]parse_generic function · python · L65-L182 (118 LOC)parse_pdf.py
def parse_generic(text, kraj_name):
"""Generic parser that works for most kraje."""
section = find_domain_section(text)
if not section:
return []
lines = section.split('\n')
domains = []
current_name = None
current_text = []
current_nace = []
skip_patterns = [
'Domény specializace', 'Tematické priority', 'Klíčová hospodářská',
'Informuje ty', 'předpokládat', 'perspektiv', 'Tematická specializace',
'Z hlediska EDP', 'Z pohledu koncových', 'byly formulovány',
'Preferované perspektivní', 'Související odvětví',
]
# For MSK, skip intro paragraphs until numbered items
in_intro = kraj_name == 'Moravskoslezský kraj'
for line in lines:
line = line.strip()
if not line:
continue
if any(line.startswith(p) for p in skip_patterns):
continue
if in_intro:
if re.match(r'^\d+\.', line):
in_intro = False
else:
parse_praha function · python · L184-L210 (27 LOC)parse_pdf.py
def parse_praha(text):
section = find_domain_section(text)
if not section:
return []
domains = []
parts = re.split(r'\n([A-D])\.\s+', section)
for i in range(1, len(parts)-1, 2):
letter = parts[i]
content = parts[i+1]
lines = content.strip().split('\n')
name = lines[0].strip().rstrip(',')
desc_lines = []
for l in lines[1:]:
clean = l.strip().lstrip('●•- ').strip()
if clean:
desc_lines.append(clean)
desc = ' '.join(desc_lines)
domains.append({
'nazev': f"{name}",
'popis': desc,
'nace': [],
'text_pro_embedding': f"{name}: {desc}"
})
return domainsextract_nace_codes function · python · L56-L67 (12 LOC)parse_pdf_v2.py
def extract_nace_codes(text):
"""Extract 2-digit NACE codes from text. Handles various formats."""
# Find all 2-digit numbers that look like NACE codes (01-99)
codes = re.findall(r'\b(\d{2})(?:\.\d+)?\b', text)
# Deduplicate preserving order, filter valid range
seen = set()
result = []
for c in codes:
if c not in seen and 1 <= int(c) <= 99:
seen.add(c)
result.append(c)
return resultfind_section function · python · L69-L84 (16 LOC)parse_pdf_v2.py
def find_section(text, start_markers, end_markers, start_offset=0):
"""Find text section between start and end markers."""
start = None
for m in start_markers:
idx = text.find(m, start_offset)
if idx >= 0:
start = idx
break
if start is None:
return "", 0
end = len(text)
for em in end_markers:
eidx = text.find(em, start + 50)
if eidx >= 0 and eidx < end:
end = eidx
return text[start:end], startRepobility analyzer · published findings · https://repobility.com
is_garbage function · python · L89-L99 (11 LOC)parse_pdf_v2.py
def is_garbage(line):
"""Detect OCR garbage lines."""
if len(line.strip()) < 3:
return True
# High ratio of unusual character sequences
alpha = sum(1 for c in line if c.isalpha())
if alpha > 0 and len(line.strip()) > 10:
spaces = line.count(' ')
if spaces / max(1, len(line.strip())) > 0.4:
return True
return Falseclean_domain_name function · python · L101-L105 (5 LOC)parse_pdf_v2.py
def clean_domain_name(name):
"""Clean up a domain name string."""
name = name.strip().rstrip(',').rstrip('.')
name = re.sub(r'\s+', ' ', name)
return nameparse_jihocesky function · python · L109-L164 (56 LOC)parse_pdf_v2.py
def parse_jihocesky(text):
"""Plain headers, bullets •, NACE on 'Vazba na CZ-NACE: hlavni vazby: ...'"""
section, _ = find_section(text,
['Domény specializace Jihočeského kraje'],
['Vznikající', 'Emerging', 'Realizace krajské'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Domény specializace'):
continue
nace_match = re.search(r'Vazba na CZ[- ]?NACE[:\s]*(.*)', line_s, re.IGNORECASE)
if nace_match:
if current:
current['nace'] = extract_nace_codes(nace_match.group(1))
domains.append(current)
current = None
continue
if line_s.startswith(('•', '●', '–', '-')) and current:
clean = line_s.lstrip('•●–- ').strip()
if clean:parse_jihomoravsky function · python · L167-L209 (43 LOC)parse_pdf_v2.py
def parse_jihomoravsky(text):
"""Bullets • with inline NACE '(těžiště v CZ-NACE NN)'. Skip cluster diagram."""
section, _ = find_section(text,
['Domény specializace kraje/ Klíčová', 'Domény specializace kraje/'],
['Vznikající', 'Emerging'])
if not section:
return [], []
lines = section.split('\n')
domains = []
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
# Domain lines are bullets with inline NACE
if line_s.startswith('•') and 'CZ-NACE' in line_s:
nace_match = re.search(r'\(těžišt[eě]\s+v\s+CZ-NACE\s+([\d,. a částicásti]+)\)', line_s)
nace_codes = extract_nace_codes(nace_match.group(1)) if nace_match else []
# Remove the NACE parenthetical from domain name
name = re.sub(r'\s*\(těžišt[eě][^)]+\)', '', line_s)
name = name.lstrip('•●– ').strip()
if name and len(name) > 3:
parse_karlovarsky function · python · L212-L270 (59 LOC)parse_pdf_v2.py
def parse_karlovarsky(text):
"""Plain headers, bullets •, NACE on 'Vazba na CZ-NACE: ...'"""
section, _ = find_section(text,
['Domény specializace kraje'],
['Vznikající', 'Emerging', 'Realizace krajské'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Domény specializace'):
continue
nace_match = re.search(r'Vazba na CZ[- ]?NACE[:\s]*(.*)', line_s, re.IGNORECASE)
if nace_match:
if current:
current['nace'] = extract_nace_codes(nace_match.group(1))
domains.append(current)
current = None
continue
if line_s.startswith(('•', '●', '–')) and current:
clean = line_s.lstrip('•●–- ').strip()
if clean:
current['desparse_kralovehradecky function · python · L273-L332 (60 LOC)parse_pdf_v2.py
def parse_kralovehradecky(text):
"""Plain headers, bullets •, NACE on 'Vazba domény na CZ-NACE: ...' with semicolons"""
section, _ = find_section(text,
['Domény specializace kraje'],
['Vznikající', 'Emerging', 'Realizace krajské'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Domény specializace'):
continue
nace_match = re.search(r'Vazb[ay]\s+dom[eé]n[yě]?\s+na\s+CZ[- ]?NACE[:\s]*(.*)', line_s, re.IGNORECASE)
if nace_match:
if current:
current['nace'] = extract_nace_codes(nace_match.group(1))
domains.append(current)
current = None
continue
# Sub-category headers within domains (e.g. "Činnost institucí:", "Tradiční činnosti:")
if linparse_liberecky function · python · L335-L387 (53 LOC)parse_pdf_v2.py
def parse_liberecky(text):
"""Plain headers, bullets •, NACE on 'Vazba domény na CZ-NACE: ...'"""
section, _ = find_section(text,
['Domény specializace Libereckého kraje'],
['Vznikající', 'Emerging', 'Realizace krajské', 'Realiza'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line) or is_garbage(line_s):
continue
if line_s.startswith('Domény specializace'):
continue
nace_match = re.search(r'Vazb[ay]\s+dom[eé]n[yě]?\s+na\s+CZ[- ]?NACE[:\s]*(.*)', line_s, re.IGNORECASE)
if nace_match:
if current:
current['nace'] = extract_nace_codes(nace_match.group(1))
domains.append(current)
current = None
continue
if line_s.startswith(('•', '●', '–')) and current:
clean = line_s.lparse_moravskoslezsky function · python · L390-L474 (85 LOC)parse_pdf_v2.py
def parse_moravskoslezsky(text):
"""Numbered domains in 2 groups, 'Zaměření domény:' blocks, 'Hlavní vazby na CZ-NACE - NN'"""
section, _ = find_section(text,
['Tematická specializace RIS MSK', 'Z pohledu koncových trhů'],
['Realizace krajské', 'Instituce s hlavní výkonnou'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
in_description = False
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
# Skip intro paragraphs
if any(skip in line_s for skip in [
'Tematická specializace', 'Z hlediska EDP', 'Z pohledu koncových',
'Z pohledu technologických', 'byly formulovány', 'prioritám při rozvoji',
'aktualizaci RIS MSK', 'Současně s tím', 'příležitosti pro více',
'specializace vychází', 'vzájemně propojených'
]):
continue
# NACE line — use Repobility (the analyzer behind this table) · https://repobility.com
parse_olomoucky function · python · L477-L515 (39 LOC)parse_pdf_v2.py
def parse_olomoucky(text):
"""Bullets • with inline description after em dash –. No NACE. No Emerging."""
section, _ = find_section(text,
['Domény specializace kraje:'],
['RIS3 mise', 'Realizace krajské', 'Realizace Krajské', 'Instituce s hlavní'])
if not section:
return [], []
lines = section.split('\n')
domains = []
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Domény specializace'):
continue
if line_s.startswith(('•', '●')):
clean = line_s.lstrip('•●– ').strip()
if '–' in clean:
parts = clean.split('–', 1)
name = parts[0].strip()
desc = parts[1].strip() if len(parts) > 1 else ''
elif '−' in clean:
parts = clean.split('−', 1)
name = parts[0].strip()
desc = parts[1].strip() if len(partsparse_pardubicky function · python · L518-L570 (53 LOC)parse_pdf_v2.py
def parse_pardubicky(text):
"""Plain headers, narrative descriptions, 'Stěžejní CZ NACE pro tuto doménu: ...'"""
section, _ = find_section(text,
['Domény specializace kraje'],
['Vznikající', 'Emerging', 'Realizace krajské', 'směřuje budoucí'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Domény specializace'):
continue
nace_match = re.search(r'Stěžejní\s+CZ\s*[-]?NACE\s+pro\s+tuto\s+doménu[:\s]*(.*)', line_s, re.IGNORECASE)
if nace_match:
if current:
current['nace'] = extract_nace_codes(nace_match.group(1))
domains.append(current)
current = None
continue
if line_s.startswith(('•', '●', '–')) and current:
clean = line_s.lstrip('•parse_plzensky function · python · L573-L632 (60 LOC)parse_pdf_v2.py
def parse_plzensky(text):
"""Plain headers, dash-bullet sub-items, 'Související odvětví (CZ-NACE)' then descriptive line"""
section, _ = find_section(text,
['Tematické priority'],
['Realizace krajské', 'Realizace Krajské', 'Instituce s hlavní'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
in_nace_block = False
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Tematické priority'):
continue
# NACE block header
if 'Související odvětví' in line_s and 'CZ-NACE' in line_s:
in_nace_block = True
continue
if in_nace_block:
if line_s.startswith('-') or line_s.startswith('–'):
codes = extract_nace_codes(line_s)
if current:
current['nace'].extend(codes)
else:
parse_praha function · python · L635-L674 (40 LOC)parse_pdf_v2.py
def parse_praha(text):
"""Letter-numbered A.-D., bullets ●. No NACE."""
section, _ = find_section(text,
['Tematické priority – Domény', 'Tematické priority — Domény', 'Tematické priority:'],
['V následující aktualizaci', 'Realizace krajské'])
if not section:
return [], []
domains = []
parts = re.split(r'\n([A-D])\.\s+', section)
for i in range(1, len(parts)-1, 2):
content = parts[i+1]
lines = content.strip().split('\n')
name = lines[0].strip().rstrip(',')
desc_lines = []
for l in lines[1:]:
clean = l.strip().lstrip('●•–- ').strip()
if clean and not is_page_marker(l) and len(clean) > 3:
desc_lines.append(clean)
domains.append({
'nazev': clean_domain_name(name),
'desc_lines': desc_lines,
'nace': [],
})
# Check for planned future domains
emerging = []
em_section, _ = find_section(text,
['Vparse_stredocesky function · python · L677-L733 (57 LOC)parse_pdf_v2.py
def parse_stredocesky(text):
"""Plain headers with inline NACE '(CZ-NACE NN, NN)', bullets • with sub-NACEs."""
section, _ = find_section(text,
['Domény specializace kraje (vertikální', 'Domény specializace kraje'],
['Vznikající', 'Emerging', 'Realizace krajské'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Domény specializace'):
continue
# Skip footnotes
if re.match(r'^\d+\s+viz:', line_s):
continue
inline_nace = re.search(r'\(CZ-NACE\s+([\d,.\s]+)\)', line_s)
if inline_nace and not line_s.startswith(('•', '●')):
# Domain name with inline NACE
if current and (current['desc_lines'] or current['nace']):
domains.append(current)
name = re.sub(parse_ustecky function · python · L736-L789 (54 LOC)parse_pdf_v2.py
def parse_ustecky(text):
"""Three-tier structure: Hlavní, KETs, Subdomény. Bullets •. No NACE."""
section, _ = find_section(text,
['Tematické priority:'],
['Realizace krajské', 'Realizace Krajské', 'Instituce s hlavní'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current_tier = None
current = None
tier_headers = {
'Hlavní oblasti specializace': 'hlavní',
'KETs (průřezové)': 'KETs',
'Užší subdomény': 'subdomény',
}
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Tematické priority'):
continue
# Detect tier
for header, tier in tier_headers.items():
if header in line_s:
current_tier = tier
break
if line_s.startswith(('•', '●')):
clean = line_s.lstrip('•●–- ').strip()
if parse_vysocina function · python · L792-L848 (57 LOC)parse_pdf_v2.py
def parse_vysocina(text):
"""Plain headers, bullets • with inline NACE '(NACE NN)' (no CZ- prefix)."""
section, _ = find_section(text,
['Domény specializace Kraje Vysočina'],
['Vznikající', 'Emerging', 'Realizace krajské'])
if not section:
return [], []
lines = section.split('\n')
domains = []
current = None
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if line_s.startswith('Domény specializace'):
continue
if line_s.startswith(('•', '●', '–')) and current:
clean = line_s.lstrip('•●–- ').strip()
# Extract inline NACE codes
nace_matches = re.findall(r'\((?:CZ-)?NACE\s+([\d,. a]+)\)', clean)
for m in nace_matches:
current['nace'].extend(extract_nace_codes(m))
# Remove NACE references from description
clean_desc = re.sub(r'\((?:CZ-)?NACE[^)]+\)', '', cparse_zlinsky function · python · L851-L946 (96 LOC)parse_pdf_v2.py
def parse_zlinsky(text):
"""Two domain sets: horizontal (under Strategická orientace) and vertical (under Tematické priority).
Vertical grouped in 3 tiers 1) 2) 3). No NACE."""
# Vertical (application) domains — under Tematické priority
v_section, _ = find_section(text,
['Odvětvové (aplikační) domény', 'Tematické priority:'],
['Vznikající', 'Emerging', 'Realizace krajské'])
if not v_section:
return [], []
lines = v_section.split('\n')
domains = []
current = None
current_tier = None
for line in lines:
line_s = line.strip()
if not line_s or is_page_marker(line):
continue
if any(skip in line_s for skip in ['Odvětvové (aplikační)', 'Tematické priority']):
continue
# Tier headers: 1) ... 2) ... 3) ...
tier_match = re.match(r'^(\d)\)\s+(.+)', line_s)
if tier_match:
current_tier = tier_match.group(2)[:50]
continue
if line_Methodology: Repobility · https://repobility.com/research/state-of-ai-code-2026/
_finalize function · python · L951-L972 (22 LOC)parse_pdf_v2.py
def _finalize(domains):
"""Convert raw domain dicts to final format."""
result = []
for d in domains:
nazev = d['nazev']
if len(nazev) < 4 or nazev.startswith('Zaměření'):
continue
popis = ' '.join(d['desc_lines']).strip()
# Clean up description
popis = re.sub(r'\s+', ' ', popis)
nace = list(dict.fromkeys(d['nace'])) # Deduplicate
text_pro_embedding = f"{nazev}: {popis}" if popis else nazev
entry = {
'nazev': nazev,
'popis': popis,
'nace': nace,
'text_pro_embedding': text_pro_embedding,
}
if 'tier' in d and d['tier']:
entry['tier'] = d['tier']
result.append(entry)
return resultApp function · javascript · L20-L134 (115 LOC)src/App.jsx
export default function App() {
const [currentSlide, setCurrentSlide] = useState(0)
const [isPaused, setIsPaused] = useState(true)
const goTo = useCallback((idx) => {
setCurrentSlide(Math.max(0, Math.min(TOTAL_SLIDES - 1, idx)))
}, [])
const next = useCallback(() => goTo(currentSlide + 1), [currentSlide, goTo])
const prev = useCallback(() => goTo(currentSlide - 1), [currentSlide, goTo])
// Keyboard navigation
useEffect(() => {
const handler = (e) => {
if (e.key === 'ArrowRight' || e.key === ' ') { e.preventDefault(); next() }
if (e.key === 'ArrowLeft') { e.preventDefault(); prev() }
if (e.key === 'Home') { e.preventDefault(); goTo(0) }
if (e.key === 'End') { e.preventDefault(); goTo(TOTAL_SLIDES - 1) }
if (e.key === 'p') setIsPaused(p => !p)
}
window.addEventListener('keydown', handler)
return () => window.removeEventListener('keydown', handler)
}, [next, prev, goTo])
// Touch swipe navigation
const touchStartXInfoPanel function · javascript · L3-L39 (37 LOC)src/InfoPanel.jsx
export default function InfoPanel({ text }) {
const [open, setOpen] = useState(false)
return (
<>
<button
onClick={() => setOpen(true)}
className="absolute top-6 right-6 z-20 w-7 h-7 flex items-center justify-center rounded-full bg-white/80 hover:bg-white shadow text-[#0087CD] text-sm font-bold cursor-pointer"
title="Vysvětlivka"
>
i
</button>
{open && (
<div
className="fixed inset-0 z-50 flex items-center justify-center"
onClick={() => setOpen(false)}
>
<div className="absolute inset-0 bg-black/30" />
<div
className="relative bg-white rounded-xl shadow-xl p-6 max-w-lg mx-4"
onClick={(e) => e.stopPropagation()}
>
<button
onClick={() => setOpen(false)}
className="absolute top-3 right-3 w-6 h-6 flex items-center justify-center rounded-full hover:bg-gray-100 text-[#9B9BA0] text-sm"
>
SlideConclusion function · javascript · L3-L153 (151 LOC)src/slides/SlideConclusion.jsx
export default function SlideConclusion() {
const [visible, setVisible] = useState(false)
useEffect(() => {
const t = setTimeout(() => setVisible(true), 200)
return () => clearTimeout(t)
}, [])
const pipeline = [
{
stage: 'Explorace',
color: '#B07ED8',
steps: [
'Analytická zvědavost: jsou si krajské specializace nějak podobné? Dá se to zjistit?',
'Mapování dostupných zdrojů: krajské karty NRIS3, databáze CEP, ČSÚ. Žádná hotová data, žádné zadání.',
],
},
{
stage: 'Sběr dat',
color: '#0087CD',
steps: [
'Parsování PDF krajských karet, extrakce strukturovaných dat ze 14 různých formátů',
],
},
{
stage: 'Analýza',
color: '#E6AF14',
steps: [
'Převod textů domén na vektory jazykovým modelem, výpočet sémantické podobnosti',
'Volba a ladění metod v dialogu: prahové hodnoty, Jaccard vs. embedding, FORD + sémantika',
'Propojení tisíců VaV projektcomputeJaccard function · javascript · L31-L37 (7 LOC)src/slides/SlideJaccardHeatmap.jsx
function computeJaccard(setA, setB) {
if (setA.size === 0 && setB.size === 0) return null
if (setA.size === 0 || setB.size === 0) return null
const intersection = new Set([...setA].filter(x => setB.has(x)))
const union = new Set([...setA, ...setB])
return intersection.size / union.size
}getSharedCodes function · javascript · L39-L41 (3 LOC)src/slides/SlideJaccardHeatmap.jsx
function getSharedCodes(setA, setB) {
return [...setA].filter(x => setB.has(x)).sort()
}SlideMapVav function · javascript · L19-L328 (310 LOC)src/slides/SlideMapVav.jsx
export default function SlideMapVav() {
const [krajeGeo, setKrajeGeo] = useState(null)
const [okresyGeo, setOkresyGeo] = useState(null)
const [sektorData, setSektorData] = useState(null)
const [tooltip, setTooltip] = useState(null)
const [dimensions, setDimensions] = useState({ width: 0, height: 0 })
useEffect(() => {
Promise.all([
fetch(`${import.meta.env.BASE_URL}data/kraje.geojson`).then(r => r.json()),
fetch(`${import.meta.env.BASE_URL}data/okresy.geojson`).then(r => r.json()),
fetch(`${import.meta.env.BASE_URL}data/vav_sektory_2024.json`).then(r => r.json()),
]).then(([kraje, okresy, sektory]) => {
setKrajeGeo(kraje)
setOkresyGeo(okresy)
setSektorData(sektory)
})
}, [])
useEffect(() => {
const update = () => setDimensions({ width: window.innerWidth, height: window.innerHeight })
update()
window.addEventListener('resize', update)
return () => window.removeEventListener('resize', update)
}, [])
//computeJaccard function · javascript · L31-L36 (6 LOC)src/slides/SlideSemantic.jsx
function computeJaccard(setA, setB) {
if (setA.size === 0 || setB.size === 0) return 0
const intersection = new Set([...setA].filter(x => setB.has(x)))
const union = new Set([...setA, ...setB])
return intersection.size / union.size
}All rows scored by the Repobility analyzer (https://repobility.com)
computeJaccard function · javascript · L48-L53 (6 LOC)src/slides/SlideSemanticMerged.jsx
function computeJaccard(setA, setB) {
if (setA.size === 0 || setB.size === 0) return 0
const intersection = new Set([...setA].filter(x => setB.has(x)))
const union = new Set([...setA, ...setB])
return intersection.size / union.size
}SlideSemanticNetwork function · javascript · L31-L413 (383 LOC)src/slides/SlideSemanticNetwork.jsx
export default function SlideSemanticNetwork() {
const [semData, setSemData] = useState(null)
const [tooltip, setTooltip] = useState(null)
const [hoveredNode, setHoveredNode] = useState(null)
const [dimensions, setDimensions] = useState({ width: 0, height: 0 })
const [nodes, setNodes] = useState([])
const [links, setLinks] = useState([])
const simRef = useRef(null)
const dragRef = useRef(null)
const svgRef = useRef(null)
useEffect(() => {
fetch(`${import.meta.env.BASE_URL}data/semanticka_podobnost.json`).then(r => r.json()).then(setSemData)
}, [])
useEffect(() => {
const update = () => setDimensions({ width: window.innerWidth, height: window.innerHeight })
update()
window.addEventListener('resize', update)
return () => window.removeEventListener('resize', update)
}, [])
// Build graph data
const graphData = useMemo(() => {
if (!semData) return null
const krajNames = semData.kraje
// Node color: shades of one blue for NACSlideTitle function · javascript · L4-L89 (86 LOC)src/slides/SlideTitle.jsx
export default function SlideTitle() {
const [visible, setVisible] = useState(false)
const [krajeGeo, setKrajeGeo] = useState(null)
const [dimensions, setDimensions] = useState({ width: 0, height: 0 })
useEffect(() => {
const t = setTimeout(() => setVisible(true), 200)
return () => clearTimeout(t)
}, [])
useEffect(() => {
fetch(`${import.meta.env.BASE_URL}data/kraje.geojson`).then(r => r.json()).then(setKrajeGeo)
}, [])
useEffect(() => {
const update = () => setDimensions({ width: window.innerWidth, height: window.innerHeight })
update()
window.addEventListener('resize', update)
return () => window.removeEventListener('resize', update)
}, [])
const pathData = useMemo(() => {
if (!krajeGeo || dimensions.width === 0) return null
const { width, height } = dimensions
const mapW = width * 0.70
const mapH = height * 0.70
const proj = d3.geoMercator().fitSize([mapW, mapH], krajeGeo)
const [tx, ty] = proj.translate()
SlideVavEkosystem function · javascript · L22-L270 (249 LOC)src/slides/SlideVavEkosystem.jsx
export default function SlideVavEkosystem() {
const [data, setData] = useState(null)
const [tooltip, setTooltip] = useState(null)
const [dimensions, setDimensions] = useState({ width: 0, height: 0 })
useEffect(() => {
fetch(`${import.meta.env.BASE_URL}data/vav_semantic_match.json`).then(r => r.json()).then(setData)
}, [])
useEffect(() => {
const update = () => setDimensions({ width: window.innerWidth, height: window.innerHeight })
update()
window.addEventListener('resize', update)
return () => window.removeEventListener('resize', update)
}, [])
// Sort kraje by total projects descending
const sortedKraje = useMemo(() => {
if (!data) return null
return Object.entries(data.kraje)
.sort((a, b) => b[1].celkem_projektu - a[1].celkem_projektu)
}, [data])
if (!sortedKraje || dimensions.width === 0) {
return (
<div className="w-full h-full flex items-center justify-center bg-[#f8f9fa]">
<p className="text-[#777] text