Source code for python_bioinformagicks.utilities._get_proportions
import pandas as pd
import numpy as np
[docs]
def get_proportions(
df: pd.DataFrame,
outer_col: str,
inner_col: str,
return_counts: bool = False
):
"""
Calculates how many items from each
`outer_col` are also in `inner_col` as
a fraction of the total items in `outer_col`.
Parameters
----------
df: pd.DataFrame
The dataframe containing at least the columns
`outer_col` and `inner_col`.
outer_col: str
The column name of the outermost column;
often `batch`, `sample`, or `genotype`.
inner_col: str
The column name of the innermost column;
often `celltype`, `leiden`.
return_counts: bool (default: False)
If `True`, return the number of cells,
otherwise return the fraction.
"""
if (outer_col in df.columns):
if (inner_col in df.columns):
groups = df.value_counts([outer_col, inner_col]).astype(float)
count_data = groups.unstack(level=outer_col)
count_data = count_data.fillna(0)
if (return_counts):
ret = count_data
else:
ret = count_data / count_data.apply(np.sum, axis=0)
return ret
print("[ERROR]: missing outer_col/innner_col")
return None