Function bodies 54 total
_compute_bounds function · python · L24-L28 (5 LOC)masquerade/production/masquerade.py
def _compute_bounds(spatial_metadata):
"""Return (x_min, x_max, y_min, y_max) from spatial metadata."""
xs = spatial_metadata["x"].astype(int)
ys = spatial_metadata["y"].astype(int)
return int(xs.min()), int(xs.max()), int(ys.min()), int(ys.max())_apply_crop function · python · L31-L36 (6 LOC)masquerade/production/masquerade.py
def _apply_crop(arr_2d, bounds, adjust):
"""Crop a 2-D array to the bounding box when *adjust* is True."""
if not adjust:
return arr_2d
x_min, x_max, y_min, y_max = bounds
return arr_2d[y_min:y_max, x_min:x_max]PreProcessImage function · python · L41-L58 (18 LOC)masquerade/production/masquerade.py
def PreProcessImage(image_source, spatial_metadata, adjust_coords=True):
"""Load the TIFF, optionally crop to coordinate bounds.
Returns
-------
image : ndarray (C, H, W)
raw_img_size : float – approximate size in GB (uint8)
bounds : tuple – (x_min, x_max, y_min, y_max)
"""
image = tifffile.imread(str(image_source))
bounds = _compute_bounds(spatial_metadata)
if adjust_coords:
x_min, x_max, y_min, y_max = bounds
image = image[:, y_min:y_max, x_min:x_max]
raw_img_size = np.array(image, dtype="uint8").nbytes / 1e9
return image, raw_img_size, boundsget_mask_channels function · python · L61-L117 (57 LOC)masquerade/production/masquerade.py
def get_mask_channels(
image, spatial_metadata, raw_img_size, bounds, adjust_coords=True
):
"""Build per-cluster binary masks, dilate, compress.
Returns
-------
channels : dict[str, ndarray]
compression_factor : float
"""
x_min, x_max, y_min, y_max = bounds
cluster_ids = spatial_metadata["cluster"].unique()
# Pre-compute the summed image once (across channels → 2-D)
summed_image = image.sum(axis=0) # (H, W)
channels = {}
compression_factor = 1.0
for idx, cid in enumerate(cluster_ids):
cluster = spatial_metadata[spatial_metadata["cluster"] == cid]
if cluster.shape[0] <= 1:
continue
# Cluster pixel coordinates (optionally shifted to crop space)
cx = cluster["x"].values.astype(int)
cy = cluster["y"].values.astype(int)
if adjust_coords:
cx = cx - x_min
cy = cy - y_min
# Build boolean mask and dilate by 1 px (replaces manual ±1 expansion)compress_marker_channels function · python · L120-L174 (55 LOC)masquerade/production/masquerade.py
def compress_marker_channels(
image_source,
channels,
compression_factor,
spatial_metadata,
bounds,
relevant_markers=None,
adjust_coords=True,
):
"""Read individual TIFF pages (lazy), crop, compress, and add to *channels*.
Reads one page at a time so memory never exceeds ~1 page + output.
"""
x_min, x_max, y_min, y_max = bounds
# Build the marker whitelist set (if provided)
marker_set = None
if relevant_markers is not None:
raw = list(relevant_markers["x"])
variants = set(raw)
for m in raw:
variants.add(m.replace("_", ""))
variants.add(m.replace("_", "-"))
marker_set = variants
with TiffFile(str(image_source)) as tif:
for page in tif.series[0].pages:
desc = page.description
if not desc:
continue
el = ElementTree.fromstring(desc).find("Biomarker")
if el is None or el.text is None:
cowriteMaskTiff function · python · L177-L190 (14 LOC)masquerade/production/masquerade.py
def writeMaskTiff(channels, outPath):
"""Stack all channels and write an ImageJ-compatible TIFF."""
labels = list(channels.keys())
stack = np.stack([channels[k] for k in labels], axis=0).astype("uint8")
tifffile.imwrite(
str(outPath),
stack,
imagej=True,
metadata={"Labels": labels},
)
del stack
gc.collect()openApp function · javascript · L1-L20 (20 LOC)site/assets/js/phenomenalist-site.js
function openApp(evt, appName) {
// Declare all variables
var i, tabcontent, tablinks;
// Get all elements with class="tabcontent" and hide them
tabcontent = document.getElementsByClassName("tabcontent");
for (i = 0; i < tabcontent.length; i++) {
tabcontent[i].style.display = "none";
}
// Get all elements with class="tablinks" and remove the class "active"
tablinks = document.getElementsByClassName("tablinks");
for (i = 0; i < tablinks.length; i++) {
tablinks[i].className = tablinks[i].className.replace(" active", "");
}
// Show the current tab, and add an "active" class to the button that opened the tab
document.getElementById(appName).style.display = "block";
evt.currentTarget.className += " active";
}Source: Repobility analyzer · https://repobility.com
model_spatial_interactions function · python · L1-L94 (94 LOC)spatial_interactions/utils/spatial-shiny/model-spatial-interactions.py
def model_spatial_interactions(spatial_obj,out_dir,label,resolution,p1,p2,min_count):
import os
import numpy as np
import pandas as pd
import scipy
from scipy import spatial, io, sparse
import re
from itertools import chain
os.makedirs(out_dir,exist_ok=True)
print('outdir')
print(out_dir)
min_count = min_count
dist_mat = scipy.spatial.distance.pdist(spatial_obj[['x','y']])
dist_square = scipy.spatial.distance.squareform(dist_mat)
# pull all class idx:
celltypes=[re.sub(pattern='[+]',repl='pos',string=x) for x in spatial_obj['celltype'] ]
classes = list(set(celltypes))
class_idx = [[x for x in range(len(celltypes)) if len(re.findall(pattern='^'+str(y)+'$', string = celltypes[x])) > 0] for y in classes]
class_sizes = np.array([len(x) for x in class_idx])
resolution=resolution
# minimum distance:
p1=p1
# maximum distance
p2=p2
p2=p2+p1
p1_scaled=p1/resolution
p2_scaled=p2/resolution
phyper=[]
logOdds=[]
read_csv_tsv function · python · L29-L45 (17 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def read_csv_tsv(filename):
file = pd.read_csv(filename, delimiter='\t') #try tsv
if 'Path' not in file.columns: #try comma-delim
file = pd.read_csv(filename)
#if 'Path' not in file.columns:
# raise Exception(filename+" did not open properly!")
#detect_legacy_vectra
if 'Sample_Name' in file.columns: #underscores bad
replace_parens = lambda x: '(' + x.group(0) + ')'
file.columns = (file.columns.str.replace('_', ' ')
.str.replace('Opal [\S]*', replace_parens)
.str.replace('Normalized Counts Total Weighting',
'(Normalized Counts, Total Weighting)')
.str.replace('HLA DR', 'HLA-DR')
)
return fileextract_data function · python · L48-L204 (157 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def extract_data(directory, classification = None, verbose = True,
drop_nan = True, drop_duplicates = False, debug = False):
"""
Extracts cell information from cell_seg_data files and pairs it with
corresponding score files from score_data.
Args:
directory: string
string of parent directory containing all files
classification: function
must take a row of the dataframe and output a string
verbose: bool
output all quality-control checking
drop_nan: bool
remove rows with NaN phenotype
drop_duplicates: bool
whether to remove duplicates with the same file name. Most recently
modified will be kept.
debug: bool
if True, only compile score info and return score_files
Returns:
output: list of dicts
each corresponding to an image file
unless debug is True, in which case:
scorecount_cells function · python · L206-L234 (29 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def count_cells(output, grouping = 'Phenotype', density = True):
"""
Counts number or density of cells for each image.
Args:
output: list of dicts
from extract_data
grouping: 'Phenotype' or 'Classification' or list of variables
whether to use VECTRA Phenotype or self-generated classification
density: bool
whether to density-normalize each image
Returns:
pandas df, with columns as image names and rows as cell types
"""
counts_table = pd.concat(
[sample['Data'].groupby(grouping).size().rename(sample['Sample Name'])
for sample in output], axis=1, sort=True).fillna(0)
errors = counts_table.columns[counts_table.sum()==0]
if len(errors) > 0:
counts_table.drop(errors, axis=1, inplace=True)
print("Warning: samples were omitted due to missing values: ", errors)
if density:
counts_table = (counts_table/counts_table.sum())
pcf function · python · L236-L394 (159 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def pcf(output, cell_types = None, phenotype = 'Phenotype',
count_threshold = 1):
"""
Calculates pair correlation function using spatstat from R.
Extra dependencies: rpy2 (3.2.2+), R (3.6+), spatstat (1.62-2+)
Args:
output: list of dicts
from extract_data
cell_types: list
Names of pertinent cell types, which will be prepended by 'All'.
Default is all recognized phenotypes
phenotype: string, 'Phenotype' or 'Classification'
whether to use VECTRA Phenotype or self-generated classification
count_threshold: int
threshold to not calculate a specific pcf
Returns:
pandas df, with each row a sample
"""
#only needs rpy2 if calculating pcf
import rpy2
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
r = robjects.r
spatstat = importr("spatstat.pcf_subset function · python · L396-L427 (32 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def pcf_subset(df, cell1, cell2, min_count=20, max_radius = 200):
"""
Analysis of pcfs of two specified cell types
Args:
df: pandas dataframe
pcf dataframe from vl.pcf
cell1, cell2: str
Names of cell types of interest (or 'All')
min_count: int or list
exclude samples with fewer than this many cells of each type
max_radius: int
steps to sum pcf to. steps are approx. 0.25 microns
Returns:
pandas df, with each row a sample
"""
cell_mask = (((df['Cell_one']==cell1) & (df['Cell_two']==cell2))
| ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)))
count_mask = ((df['min_count'] > min_count)
if type(min_count) == int
else ((df['count_one'] > min_count[0])
& (df['count_two'] > min_count[1])))
pcf_table = df[cell_mask & count_mask].copy()
pcf_table['normalization'] = pd.to_numeric(pcf_tableerror_median function · python · L429-L444 (16 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def error_median(x):
"""
Calculate shaded plot regions by bootstrapping for error bars.
Example of use for plotting:
pcf_median = data.groupby(variable)['PCF'].apply(
lambda x: np.median(np.vstack(x), axis=0))
pcf_error = data.groupby(variable)['PCF'].apply(error_median)
"""
bootstrap = np.vstack([np.median(
np.vstack(x)[np.random.choice(len(x), len(x), replace=True),:]
, axis=0) for q in range(100)])
return [np.percentile(bootstrap, 2.5, axis = 0),
np.percentile(bootstrap, 97.5, axis = 0)]nearest_neighbor function · python · L446-L495 (50 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def nearest_neighbor(output, phenotype = 'Phenotype', k = 1):
"""
Args:
output: list of dicts
from extract_data
phenotype: string, 'Phenotype' or 'Classification'
whether to use VECTRA Phenotype or self-generated classification
k: int
number of neighbors to average
Returns:
pandas df,
Mean distance from each cell type (first index level) to the
closest k neighbors of a given cell type (second index level), for each
sample (column)
"""
nn_output = []
knn_mean_function = lambda x: (x.apply(pd.Series.nsmallest, axis=1, n=k)
.mean(axis = 1))
knn_group_function = lambda group: (group.groupby(group.columns, axis=1)
.apply(knn_mean_function)
.mean())
for sample in output:
selection = sample['Data']
sample_nRepobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
run_all function · python · L497-L523 (27 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def run_all(output, folder_location):
"""
Args:
output: list of dicts
from extract_data
folder_location: string
where to save files
Returns:
None
"""
xy = pd.concat([x['Data'].assign(Sample = x['Sample Name'])
for x in output])
xy.rename(columns={'Sample':'Sample Name'}, inplace=True)
xy.to_csv(folder_location+"/xy.csv")
pcf_df = pcf(output, phenotype = 'Classification', count_threshold=10)
pcf_df.to_csv(folder_location+"/pcf.csv")
nn_output = []
for k in [1,5]:
nn_output.append(nearest_neighbor(output, phenotype = 'Classification',
k = k).stack().rename(k))
nn = pd.concat(nn_output, axis=1)
nn.index.set_names('Sample Name', level=2, inplace=True)
nn.to_csv(folder_location+"/nearest_neighbor.csv")
return Noneinteraction_subset function · python · L531-L534 (4 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def interaction_subset(df, cell1, cell2, min_count=20):
pcf_table = df[(((df['Cell_one']==cell1) & (df['Cell_two']==cell2)) | ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)) ) & (df['min_count'] > min_count)]
pcf_table = pcf_table[pcf_table['PCF'] != 'NA']
return pcf_tableplot_difference function · python · L538-L553 (16 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def plot_difference(data, ax = False):
pcf_dm = np.median(np.vstack(data['normPCF']),axis=0)
pcf_dsem = error_median(data['normPCF'])
if ax == False:
fig, ax = plt.subplots()
fig.set_size_inches(8,5)
ax.plot(x_data, pcf_dm)
ax.fill_between(x_data, pcf_dsem[0], pcf_dsem[1], alpha = .4, lw=0)
ax.set_title(data['Cell_one'].iloc[0] + ' vs. ' + data['Cell_two'].iloc[0], fontsize = 18);
ax.plot(x_data, np.ones(len(x_data)), 'k--');
ax.set_xlabel('Radius ($\mu$m)', fontsize = 16);
ax.set_ylabel('PCF', fontsize = 16);
ax.tick_params(axis='both', which='major', labelsize=14);error_median function · python · L566-L568 (3 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def error_median(x):
bootstrap = np.vstack([np.median(np.vstack(x)[np.random.choice(len(x), len(x), replace=True),:],axis=0) for q in range(100)])
return [np.percentile(bootstrap, 2.5, axis = 0), np.percentile(bootstrap, 97.5, axis = 0)]plot_difference function · python · L570-L585 (16 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def plot_difference(data, ax = False):
pcf_dm = np.median(np.vstack(data['normPCF']),axis=0)
pcf_dsem = error_median(data['normPCF'])
if ax == False:
fig, ax = plt.subplots()
fig.set_size_inches(8,5)
ax.plot(x_data, pcf_dm)
ax.fill_between(x_data, pcf_dsem[0], pcf_dsem[1], alpha = .4, lw=0)
ax.set_title(data['Cell_one'].iloc[0] + ' vs. ' + data['Cell_two'].iloc[0], fontsize = 18);
ax.plot(x_data, np.ones(len(x_data)), 'k--');
ax.set_xlabel('Radius ($\mu$m)', fontsize = 16);
ax.set_ylabel('PCF', fontsize = 16);
ax.tick_params(axis='both', which='major', labelsize=14);interaction_subset function · python · L587-L590 (4 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def interaction_subset(df, cell1, cell2, min_count=20):
pcf_table = df[(((df['Cell_one']==cell1) & (df['Cell_two']==cell2)) | ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)) ) & (df['min_count'] > min_count)]
pcf_table = pcf_table[pcf_table['PCF'] != 'NA']
return pcf_tableplot_pcf_curves function · python · L594-L681 (88 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def plot_pcf_curves(pcf_df,cell_types_,resolution,label,out_path):
CELL_TYPES_ALIAS =['All']
CELL_TYPES_ALIAS.extend(cell_types_)
CELL_TYPES = CELL_TYPES_ALIAS
COLOR_LIST = sns.color_palette(None, len(CELL_TYPES))
print(COLOR_LIST)
x_data = np.arange(0, 125, .25)
fig = plt.figure(figsize=(10, 8))
outer = mpl.gridspec.GridSpec(2, 3, wspace=0.2, hspace=0.2)
print(str(outer.ncols))
print(str(outer.nrows))
print(len(CELL_TYPES))
for i in range(len(CELL_TYPES)):
inner = mpl.gridspec.GridSpecFromSubplotSpec(2, 1,
subplot_spec=outer[i], wspace=0.1, hspace=0.1, height_ratios=[1,2])
ax = plt.Subplot(fig, inner[0])
ax2 = plt.Subplot(fig, inner[1])
ax.set_title(CELL_TYPES_ALIAS[i], fontsize = 16);
ax2.plot(x_data, np.ones(len(x_data)), 'k--');
for j in range(len(CELL_TYPES)):
data = interaction_subset(pcf_df, CELL_TYPES[i], CELL_TYPES[j])
pcf_dm = np.median(np.vstpcf_AUC function · python · L683-L778 (96 LOC)spatial_interactions/utils/spatial-shiny/vectra_lib_v3.py
def pcf_AUC(pcf_df,cell_types_,cell1,label,out_path,resolution):
CELL_TYPES = ['All']
CELL_TYPES.extend(cell_types_)
print(CELL_TYPES)
CELL_TYPES_ALIAS = CELL_TYPES
CELL2_LIST = CELL_TYPES
print(CELL_TYPES)
CELL2_LIST.pop(CELL_TYPES.index(cell1))
print(CELL_TYPES)
LABELS = CELL_TYPES
print(cell1)
print(LABELS)
print('Cell 2 list:')
print(CELL2_LIST)
COLORS = sns.color_palette(None, len(CELL_TYPES))
STEP_TO_UM = resolution
def interaction_subset(df, cell1, cell2, min_count=20):
pcf_table = df[(((df['Cell_one']==cell1) & (df['Cell_two']==cell2)) | ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)) ) & (df['min_count'] > min_count)]
pcf_table = pcf_table[pcf_table['PCF'] != 'NA']
return pcf_table
def pvalue_text(data1, data2, verbose = False):
pvalue = stats.mannwhitneyu(data1, data2, alternative='two-sided')[1]
if verbose:
print(pvalue)
if pvalue < 0.001:
return '***'
elif pvalue < 0.01:
Methodology: Repobility · https://repobility.com/research/state-of-ai-code-2026/
model_spatial_interactions function · python · L1-L94 (94 LOC)utils/spatial-shiny/model-spatial-interactions.py
def model_spatial_interactions(spatial_obj,out_dir,label,resolution,p1,p2,min_count):
import os
import numpy as np
import pandas as pd
import scipy
from scipy import spatial, io, sparse
import re
from itertools import chain
os.makedirs(out_dir,exist_ok=True)
print('outdir')
print(out_dir)
min_count = min_count
dist_mat = scipy.spatial.distance.pdist(spatial_obj[['x','y']])
dist_square = scipy.spatial.distance.squareform(dist_mat)
# pull all class idx:
celltypes=[re.sub(pattern='[+]',repl='pos',string=x) for x in spatial_obj['celltype'] ]
classes = list(set(celltypes))
class_idx = [[x for x in range(len(celltypes)) if len(re.findall(pattern='^'+str(y)+'$', string = celltypes[x])) > 0] for y in classes]
class_sizes = np.array([len(x) for x in class_idx])
resolution=resolution
# minimum distance:
p1=p1
# maximum distance
p2=p2
p2=p2+p1
p1_scaled=p1/resolution
p2_scaled=p2/resolution
phyper=[]
logOdds=[]
read_csv_tsv function · python · L31-L47 (17 LOC)utils/spatial-shiny/vectra_lib_v3.py
def read_csv_tsv(filename):
file = pd.read_csv(filename, delimiter='\t') #try tsv
if 'Path' not in file.columns: #try comma-delim
file = pd.read_csv(filename)
#if 'Path' not in file.columns:
# raise Exception(filename+" did not open properly!")
#detect_legacy_vectra
if 'Sample_Name' in file.columns: #underscores bad
replace_parens = lambda x: '(' + x.group(0) + ')'
file.columns = (file.columns.str.replace('_', ' ')
.str.replace('Opal [\S]*', replace_parens)
.str.replace('Normalized Counts Total Weighting',
'(Normalized Counts, Total Weighting)')
.str.replace('HLA DR', 'HLA-DR')
)
return fileextract_data function · python · L50-L209 (160 LOC)utils/spatial-shiny/vectra_lib_v3.py
def extract_data(directory, classification = None, verbose = True,
drop_nan = True, drop_duplicates = False, debug = False):
"""
Extracts cell information from cell_seg_data files and pairs it with
corresponding score files from score_data.
Args:
directory: string
string of parent directory containing all files
classification: function
must take a row of the dataframe and output a string
verbose: bool
output all quality-control checking
drop_nan: bool
remove rows with NaN phenotype
drop_duplicates: bool
whether to remove duplicates with the same file name. Most recently
modified will be kept.
debug: bool
if True, only compile score info and return score_files
Returns:
output: list of dicts
each corresponding to an image file
unless debug is True, in which case:
scorecount_cells function · python · L211-L239 (29 LOC)utils/spatial-shiny/vectra_lib_v3.py
def count_cells(output, grouping = 'Phenotype', density = True):
"""
Counts number or density of cells for each image.
Args:
output: list of dicts
from extract_data
grouping: 'Phenotype' or 'Classification' or list of variables
whether to use VECTRA Phenotype or self-generated classification
density: bool
whether to density-normalize each image
Returns:
pandas df, with columns as image names and rows as cell types
"""
counts_table = pd.concat(
[sample['Data'].groupby(grouping).size().rename(sample['Sample Name'])
for sample in output], axis=1, sort=True).fillna(0)
errors = counts_table.columns[counts_table.sum()==0]
if len(errors) > 0:
counts_table.drop(errors, axis=1, inplace=True)
print("Warning: samples were omitted due to missing values: ", errors)
if density:
counts_table = (counts_table/counts_table.sum())
pcf function · python · L241-L420 (180 LOC)utils/spatial-shiny/vectra_lib_v3.py
def pcf(output, cell_types = None, phenotype = 'Phenotype',
count_threshold = 1):
"""
Calculates pair correlation function using spatstat from R.
Extra dependencies: rpy2 (3.2.2+), R (3.6+), spatstat (1.62-2+)
Args:
output: list of dicts
from extract_data
cell_types: list
Names of pertinent cell types, which will be prepended by 'All'.
Default is all recognized phenotypes
phenotype: string, 'Phenotype' or 'Classification'
whether to use VECTRA Phenotype or self-generated classification
count_threshold: int
threshold to not calculate a specific pcf
Returns:
pandas df, with each row a sample
"""
#only needs rpy2 if calculating pcf
import rpy2
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
r = robjects.r
spatstat = importr("spatstat.pcf_subset function · python · L422-L453 (32 LOC)utils/spatial-shiny/vectra_lib_v3.py
def pcf_subset(df, cell1, cell2, min_count=20, max_radius = 200):
"""
Analysis of pcfs of two specified cell types
Args:
df: pandas dataframe
pcf dataframe from vl.pcf
cell1, cell2: str
Names of cell types of interest (or 'All')
min_count: int or list
exclude samples with fewer than this many cells of each type
max_radius: int
steps to sum pcf to. steps are approx. 0.25 microns
Returns:
pandas df, with each row a sample
"""
cell_mask = (((df['Cell_one']==cell1) & (df['Cell_two']==cell2))
| ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)))
count_mask = ((df['min_count'] > min_count)
if type(min_count) == int
else ((df['count_one'] > min_count[0])
& (df['count_two'] > min_count[1])))
pcf_table = df[cell_mask & count_mask].copy()
pcf_table['normalization'] = pd.to_numeric(pcf_tableerror_median function · python · L455-L470 (16 LOC)utils/spatial-shiny/vectra_lib_v3.py
def error_median(x):
"""
Calculate shaded plot regions by bootstrapping for error bars.
Example of use for plotting:
pcf_median = data.groupby(variable)['PCF'].apply(
lambda x: np.median(np.vstack(x), axis=0))
pcf_error = data.groupby(variable)['PCF'].apply(error_median)
"""
bootstrap = np.vstack([np.median(
np.vstack(x)[np.random.choice(len(x), len(x), replace=True),:]
, axis=0) for q in range(100)])
return [np.percentile(bootstrap, 2.5, axis = 0),
np.percentile(bootstrap, 97.5, axis = 0)]nearest_neighbor function · python · L472-L521 (50 LOC)utils/spatial-shiny/vectra_lib_v3.py
def nearest_neighbor(output, phenotype = 'Phenotype', k = 1):
"""
Args:
output: list of dicts
from extract_data
phenotype: string, 'Phenotype' or 'Classification'
whether to use VECTRA Phenotype or self-generated classification
k: int
number of neighbors to average
Returns:
pandas df,
Mean distance from each cell type (first index level) to the
closest k neighbors of a given cell type (second index level), for each
sample (column)
"""
nn_output = []
knn_mean_function = lambda x: (x.apply(pd.Series.nsmallest, axis=1, n=k)
.mean(axis = 1))
knn_group_function = lambda group: (group.groupby(group.columns, axis=1)
.apply(knn_mean_function)
.mean())
for sample in output:
selection = sample['Data']
sample_nHi, dataset curator — please cite Repobility (https://repobility.com) when reusing this data.
run_all function · python · L523-L549 (27 LOC)utils/spatial-shiny/vectra_lib_v3.py
def run_all(output, folder_location):
"""
Args:
output: list of dicts
from extract_data
folder_location: string
where to save files
Returns:
None
"""
xy = pd.concat([x['Data'].assign(Sample = x['Sample Name'])
for x in output])
xy.rename(columns={'Sample':'Sample Name'}, inplace=True)
xy.to_csv(folder_location+"/xy.csv")
pcf_df = pcf(output, phenotype = 'Classification', count_threshold=10)
pcf_df.to_csv(folder_location+"/pcf.csv")
nn_output = []
for k in [1,5]:
nn_output.append(nearest_neighbor(output, phenotype = 'Classification',
k = k).stack().rename(k))
nn = pd.concat(nn_output, axis=1)
nn.index.set_names('Sample Name', level=2, inplace=True)
nn.to_csv(folder_location+"/nearest_neighbor.csv")
return Noneinteraction_subset function · python · L557-L560 (4 LOC)utils/spatial-shiny/vectra_lib_v3.py
def interaction_subset(df, cell1, cell2, min_count=20):
pcf_table = df[(((df['Cell_one']==cell1) & (df['Cell_two']==cell2)) | ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)) ) & (df['min_count'] > min_count)]
pcf_table = pcf_table[pcf_table['PCF'] != 'NA']
return pcf_tableplot_difference function · python · L564-L579 (16 LOC)utils/spatial-shiny/vectra_lib_v3.py
def plot_difference(data, ax = False):
pcf_dm = np.median(np.vstack(data['normPCF']),axis=0)
pcf_dsem = error_median(data['normPCF'])
if ax == False:
fig, ax = plt.subplots()
fig.set_size_inches(8,5)
ax.plot(x_data, pcf_dm)
ax.fill_between(x_data, pcf_dsem[0], pcf_dsem[1], alpha = .4, lw=0)
ax.set_title(data['Cell_one'].iloc[0] + ' vs. ' + data['Cell_two'].iloc[0], fontsize = 18);
ax.plot(x_data, np.ones(len(x_data)), 'k--');
ax.set_xlabel('Radius ($\mu$m)', fontsize = 16);
ax.set_ylabel('PCF', fontsize = 16);
ax.tick_params(axis='both', which='major', labelsize=14);error_median function · python · L592-L594 (3 LOC)utils/spatial-shiny/vectra_lib_v3.py
def error_median(x):
bootstrap = np.vstack([np.median(np.vstack(x)[np.random.choice(len(x), len(x), replace=True),:],axis=0) for q in range(100)])
return [np.percentile(bootstrap, 2.5, axis = 0), np.percentile(bootstrap, 97.5, axis = 0)]plot_difference function · python · L596-L611 (16 LOC)utils/spatial-shiny/vectra_lib_v3.py
def plot_difference(data, ax = False):
pcf_dm = np.median(np.vstack(data['normPCF']),axis=0)
pcf_dsem = error_median(data['normPCF'])
if ax == False:
fig, ax = plt.subplots()
fig.set_size_inches(8,5)
ax.plot(x_data, pcf_dm)
ax.fill_between(x_data, pcf_dsem[0], pcf_dsem[1], alpha = .4, lw=0)
ax.set_title(data['Cell_one'].iloc[0] + ' vs. ' + data['Cell_two'].iloc[0], fontsize = 18);
ax.plot(x_data, np.ones(len(x_data)), 'k--');
ax.set_xlabel('Radius ($\mu$m)', fontsize = 16);
ax.set_ylabel('PCF', fontsize = 16);
ax.tick_params(axis='both', which='major', labelsize=14);interaction_subset function · python · L613-L619 (7 LOC)utils/spatial-shiny/vectra_lib_v3.py
def interaction_subset(df, cell1, cell2, min_count=20):
pcf_table = df[(((df['Cell_one']==cell1) & (df['Cell_two']==cell2)) | ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)) ) & (df['min_count'] > min_count)]
#assert pcf_table.shape[0] == 0, str(pcf_table.shape) + ' | '+str(cell1) + ' | '+str(cell2) + ' | '+str(pcf_table.head())+' | '+str(np.array(np.array(pcf_table['PCF'])[0]) != 'NA')
#pcf_table = pcf_table[np.array(pcf_table['PCF'])[0] != 'NA']
return pcf_tableplot_pcf_curves function · python · L623-L716 (94 LOC)utils/spatial-shiny/vectra_lib_v3.py
def plot_pcf_curves(pcf_df,cell_types_,resolution,label,out_path):
CELL_TYPES_ALIAS =['All']
CELL_TYPES_ALIAS.extend(cell_types_)
CELL_TYPES = CELL_TYPES_ALIAS
COLOR_LIST = sns.color_palette(None, len(CELL_TYPES))
print(COLOR_LIST)
x_data = np.arange(0, 125, .25)
fig = plt.figure(figsize=(10, 8))
outer = mpl.gridspec.GridSpec(3, 3, wspace=0.2, hspace=0.2)
print(str(outer.ncols))
print(str(outer.nrows))
print(len(CELL_TYPES))
#assert pcf_df.shape[0] > 1000,str(pcf_df['PCF']) + str(pcf_df.columns) + str(pcf_df.shape) + str(set(pcf_df['min_count']))
for i in range(len(CELL_TYPES)):
print('i: '+str(i))
inner = mpl.gridspec.GridSpecFromSubplotSpec(2, 1,
subplot_spec=outer[i], wspace=0.1, hspace=0.1, height_ratios=[1,2])
ax = plt.Subplot(fig, inner[0])
ax2 = plt.Subplot(fig, inner[1])
ax.set_title(CELL_TYPES_ALIAS[i], fontsize = 16);
ax2.plot(x_data, np.ones(len(x_data)), 'k--');
for j in rangepcf_AUC function · python · L718-L813 (96 LOC)utils/spatial-shiny/vectra_lib_v3.py
def pcf_AUC(pcf_df,cell_types_,cell1,label,out_path,resolution):
CELL_TYPES = ['All']
CELL_TYPES.extend(cell_types_)
print(CELL_TYPES)
CELL_TYPES_ALIAS = CELL_TYPES
CELL2_LIST = CELL_TYPES
print(CELL_TYPES)
CELL2_LIST.pop(CELL_TYPES.index(cell1))
print(CELL_TYPES)
LABELS = CELL_TYPES
print(cell1)
print(LABELS)
print('Cell 2 list:')
print(CELL2_LIST)
COLORS = sns.color_palette(None, len(CELL_TYPES))
STEP_TO_UM = resolution
def interaction_subset(df, cell1, cell2, min_count=20):
pcf_table = df[(((df['Cell_one']==cell1) & (df['Cell_two']==cell2)) | ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)) ) & (df['min_count'] > min_count)]
#pcf_table = pcf_table[pcf_table['PCF'] != 'NA']
return pcf_table
def pvalue_text(data1, data2, verbose = False):
pvalue = stats.mannwhitneyu(data1, data2, alternative='two-sided')[1]
if verbose:
print(pvalue)
if pvalue < 0.001:
return '***'
elif pvalue < 0.01:
Source: Repobility analyzer · https://repobility.com
read_csv_tsv function · python · L29-L45 (17 LOC)utils/spatial-shiny/vectra_lib_v4.py
def read_csv_tsv(filename):
file = pd.read_csv(filename, delimiter='\t') #try tsv
if 'Path' not in file.columns: #try comma-delim
file = pd.read_csv(filename)
#if 'Path' not in file.columns:
# raise Exception(filename+" did not open properly!")
#detect_legacy_vectra
if 'Sample_Name' in file.columns: #underscores bad
replace_parens = lambda x: '(' + x.group(0) + ')'
file.columns = (file.columns.str.replace('_', ' ')
.str.replace('Opal [\S]*', replace_parens)
.str.replace('Normalized Counts Total Weighting',
'(Normalized Counts, Total Weighting)')
.str.replace('HLA DR', 'HLA-DR')
)
return fileextract_data function · python · L48-L207 (160 LOC)utils/spatial-shiny/vectra_lib_v4.py
def extract_data(directory, classification = None, verbose = True,
drop_nan = True, drop_duplicates = False, debug = False):
"""
Extracts cell information from cell_seg_data files and pairs it with
corresponding score files from score_data.
Args:
directory: string
string of parent directory containing all files
classification: function
must take a row of the dataframe and output a string
verbose: bool
output all quality-control checking
drop_nan: bool
remove rows with NaN phenotype
drop_duplicates: bool
whether to remove duplicates with the same file name. Most recently
modified will be kept.
debug: bool
if True, only compile score info and return score_files
Returns:
output: list of dicts
each corresponding to an image file
unless debug is True, in which case:
scorecount_cells function · python · L209-L237 (29 LOC)utils/spatial-shiny/vectra_lib_v4.py
def count_cells(output, grouping = 'Phenotype', density = True):
"""
Counts number or density of cells for each image.
Args:
output: list of dicts
from extract_data
grouping: 'Phenotype' or 'Classification' or list of variables
whether to use VECTRA Phenotype or self-generated classification
density: bool
whether to density-normalize each image
Returns:
pandas df, with columns as image names and rows as cell types
"""
counts_table = pd.concat(
[sample['Data'].groupby(grouping).size().rename(sample['Sample Name'])
for sample in output], axis=1, sort=True).fillna(0)
errors = counts_table.columns[counts_table.sum()==0]
if len(errors) > 0:
counts_table.drop(errors, axis=1, inplace=True)
print("Warning: samples were omitted due to missing values: ", errors)
if density:
counts_table = (counts_table/counts_table.sum())
pcf function · python · L239-L413 (175 LOC)utils/spatial-shiny/vectra_lib_v4.py
def pcf(output, cell_types = None, phenotype = 'Phenotype',
count_threshold = 1,radius=30,resolution=0.377):
"""
Calculates pair correlation function using spatstat from R.
Extra dependencies: rpy2 (3.2.2+), R (3.6+), spatstat (1.62-2+)
Args:
output: list of dicts
from extract_data
cell_types: list
Names of pertinent cell types, which will be prepended by 'All'.
Default is all recognized phenotypes
phenotype: string, 'Phenotype' or 'Classification'
whether to use VECTRA Phenotype or self-generated classification
count_threshold: int
threshold to not calculate a specific pcf
Returns:
pandas df, with each row a sample
"""
#only needs rpy2 if calculating pcf
import rpy2
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
r = robjects.r
sppcf_subset function · python · L415-L446 (32 LOC)utils/spatial-shiny/vectra_lib_v4.py
def pcf_subset(df, cell1, cell2, min_count=20, max_radius = 200):
"""
Analysis of pcfs of two specified cell types
Args:
df: pandas dataframe
pcf dataframe from vl.pcf
cell1, cell2: str
Names of cell types of interest (or 'All')
min_count: int or list
exclude samples with fewer than this many cells of each type
max_radius: int
steps to sum pcf to. steps are approx. 0.25 microns
Returns:
pandas df, with each row a sample
"""
cell_mask = (((df['Cell_one']==cell1) & (df['Cell_two']==cell2))
| ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)))
count_mask = ((df['min_count'] > min_count)
if type(min_count) == int
else ((df['count_one'] > min_count[0])
& (df['count_two'] > min_count[1])))
pcf_table = df[cell_mask & count_mask].copy()
pcf_table['normalization'] = pd.to_numeric(pcf_tableerror_median function · python · L448-L463 (16 LOC)utils/spatial-shiny/vectra_lib_v4.py
def error_median(x):
"""
Calculate shaded plot regions by bootstrapping for error bars.
Example of use for plotting:
pcf_median = data.groupby(variable)['PCF'].apply(
lambda x: np.median(np.vstack(x), axis=0))
pcf_error = data.groupby(variable)['PCF'].apply(error_median)
"""
bootstrap = np.vstack([np.median(
np.vstack(x)[np.random.choice(len(x), len(x), replace=True),:]
, axis=0) for q in range(100)])
return [np.percentile(bootstrap, 2.5, axis = 0),
np.percentile(bootstrap, 97.5, axis = 0)]nearest_neighbor function · python · L465-L514 (50 LOC)utils/spatial-shiny/vectra_lib_v4.py
def nearest_neighbor(output, phenotype = 'Phenotype', k = 1):
"""
Args:
output: list of dicts
from extract_data
phenotype: string, 'Phenotype' or 'Classification'
whether to use VECTRA Phenotype or self-generated classification
k: int
number of neighbors to average
Returns:
pandas df,
Mean distance from each cell type (first index level) to the
closest k neighbors of a given cell type (second index level), for each
sample (column)
"""
nn_output = []
knn_mean_function = lambda x: (x.apply(pd.Series.nsmallest, axis=1, n=k)
.mean(axis = 1))
knn_group_function = lambda group: (group.groupby(group.columns, axis=1)
.apply(knn_mean_function)
.mean())
for sample in output:
selection = sample['Data']
sample_nrun_all function · python · L516-L542 (27 LOC)utils/spatial-shiny/vectra_lib_v4.py
def run_all(output, folder_location):
"""
Args:
output: list of dicts
from extract_data
folder_location: string
where to save files
Returns:
None
"""
xy = pd.concat([x['Data'].assign(Sample = x['Sample Name'])
for x in output])
xy.rename(columns={'Sample':'Sample Name'}, inplace=True)
xy.to_csv(folder_location+"/xy.csv")
pcf_df = pcf(output, phenotype = 'Classification', count_threshold=10)
pcf_df.to_csv(folder_location+"/pcf.csv")
nn_output = []
for k in [1,5]:
nn_output.append(nearest_neighbor(output, phenotype = 'Classification',
k = k).stack().rename(k))
nn = pd.concat(nn_output, axis=1)
nn.index.set_names('Sample Name', level=2, inplace=True)
nn.to_csv(folder_location+"/nearest_neighbor.csv")
return NoneRepobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
interaction_subset function · python · L550-L553 (4 LOC)utils/spatial-shiny/vectra_lib_v4.py
def interaction_subset(df, cell1, cell2, min_count=20):
pcf_table = df[(((df['Cell_one']==cell1) & (df['Cell_two']==cell2)) | ((df['Cell_one']==cell2) & (df['Cell_two']==cell1)) ) & (df['min_count'] > min_count)]
#pcf_table = pcf_table[pcf_table['PCF'] != 'NA']
return pcf_tableplot_difference function · python · L557-L572 (16 LOC)utils/spatial-shiny/vectra_lib_v4.py
def plot_difference(data, ax = False):
pcf_dm = np.median(np.vstack(data['normPCF']),axis=0)
pcf_dsem = error_median(data['normPCF'])
if ax == False:
fig, ax = plt.subplots()
fig.set_size_inches(8,5)
ax.plot(x_data, pcf_dm)
ax.fill_between(x_data, pcf_dsem[0], pcf_dsem[1], alpha = .4, lw=0)
ax.set_title(data['Cell_one'].iloc[0] + ' vs. ' + data['Cell_two'].iloc[0], fontsize = 18);
ax.plot(x_data, np.ones(len(x_data)), 'k--');
ax.set_xlabel('Radius ($\mu$m)', fontsize = 16);
ax.set_ylabel('PCF', fontsize = 16);
ax.tick_params(axis='both', which='major', labelsize=14);error_median function · python · L585-L587 (3 LOC)utils/spatial-shiny/vectra_lib_v4.py
def error_median(x):
bootstrap = np.vstack([np.median(np.vstack(x)[np.random.choice(len(x), len(x), replace=True),:],axis=0) for q in range(100)])
return [np.percentile(bootstrap, 2.5, axis = 0), np.percentile(bootstrap, 97.5, axis = 0)]page 1 / 2next ›