Source code for python_bioinformagicks.tools._scale_by_group
import scanpy as sc
import anndata as ad
import numpy as np
import scipy
[docs]
def scale_by_group(
adata: ad.AnnData,
groupby: str = "celltype",
layer: str = None,
copy: bool = True,
zero_center: bool = False,
):
"""
Performs z-standard scaling, similar to
:code:`sc.pp.scale`, but for each category
in :code:`adata.obs[groupby]` independently.
For instance, if :code:`groupby == "celltype"`,
the resulting data matrix of z-scaled
expression values would highlight changes
from the mean of all cells within each
celltype (but changes between celltypes
would be largely meaningless).
Parameters
----------
adata: ad.AnnData
groupby: str (default: "celltype")
The categorical column in `adata.obs`
containing groups to subset and standard
scale within.
layer: str (default: `None`)
The layer key in `adata.layers` whose
values should be scaled (i.e. the starting
matrix). If `None`, use :code:`adata.X`.
zero_center: bool (default: `False`)
As in :code:`sc.pp.scale`; when `True`, subtract
the mean gene expression within each group.
When `False`, the sparse structure of the gene
expression data remains.
copy: bool (default: `True`)
If `True`, return a copy of the resulting
scaled data matrix. If `False`, modify
input `adata` by placing resulting matrix
in :code:`adata.layers[scaled_by_ + str(groupby)]`.
Returns
-------
If :code:`copy is True`, return the scaled matrix.
If :code:`copy is False`, scaled matrix is placed
in :code:`adata.layers["scaled_by_" + str(groupby)]`,
which modifies :code:`adata` in-place.
If :code:`zero_center is False`, resulting scaled
matrix is of type :code:`scipy.sparse.csr_matrix`, else
result is of type :code:`np.ndarray` (dense).
"""
ret = np.zeros(adata.X.shape)
for group in adata.obs[groupby].astype("category").cat.categories:
# get obs indices of parent adata object
obs_mask = adata.obs[groupby] == group
parent_obs_indices = np.nonzero(obs_mask)[0]
# calculated scaled matrix of child object
# TODO: this assumes X_group_scaled is sparse,
# need to consider if dense
X_group_scaled = scipy.sparse.csr_matrix(
sc.pp.scale(
adata[obs_mask],
zero_center = zero_center,
layer = layer,
copy = True
).X,
)
# calculate parent indices
sparse_indices = np.asarray(X_group_scaled.nonzero())
sparse_indices[0] = [parent_obs_indices[x] for x in sparse_indices[0]]
ravelled_sprase_indices = np.ravel_multi_index(
sparse_indices,
adata.X.shape
)
# put the calculcated data into the parent matrix
np.put(
ret,
ravelled_sprase_indices,
X_group_scaled.data,
)
# save memory by converting to sparse matrix
# if the result was not zero-centered
if (zero_center is False):
ret = scipy.sparse.csr_matrix(ret)
if (copy):
return ret
else:
adata.layers["scaled_by_" + str(groupby)] = ret
return None