Skip to content

Visualization tools

hierarchical_clustering(df, vmin=None, vmax=None, figsize=(8, 8), top_height=2, left_width=2, xmaxticks=None, ymaxticks=None, metric='cosine', cmap=None)

Perform and plot hierarchical clustering on a dataframe.

Parameters:

Name Type Description Default
df DataFrame

Input data in DataFrame format.

required
vmin Optional[float]

Minimum value to anchor the colormap. If None, inferred from data.

None
vmax Optional[float]

Maximum value to anchor the colormap. If None, inferred from data.

None
figsize Tuple[int, int]

Size of the main figure in inches.

(8, 8)
top_height int

Height of the top dendrogram.

2
left_width int

Width of the left dendrogram.

2
xmaxticks Optional[int]

Maximum number of x-ticks to display.

None
ymaxticks Optional[int]

Maximum number of y-ticks to display.

None
metric Union[str, Tuple[str, str]]

Distance metric to use. Either a string to use the same metric for both axes, or a tuple of two strings for different metrics for each axis.

'cosine'
cmap Optional[str]

Matplotlib colormap name. If None, uses "coolwarm".

None

Returns:

Type Description
Tuple[DataFrame, Figure, List[int], List[int]]

A tuple containing: - The clustered DataFrame (reordered according to clustering) - The matplotlib Figure object - The indices of rows in their clustered order - The indices of columns in their clustered order

Source code in src/ms_mint/matplotlib_tools.py
def hierarchical_clustering(
    df: pd.DataFrame,
    vmin: Optional[float] = None,
    vmax: Optional[float] = None,
    figsize: Tuple[int, int] = (8, 8),
    top_height: int = 2,
    left_width: int = 2,
    xmaxticks: Optional[int] = None,
    ymaxticks: Optional[int] = None,
    metric: Union[str, Tuple[str, str]] = "cosine",
    cmap: Optional[str] = None,
) -> Tuple[pd.DataFrame, Figure, List[int], List[int]]:
    """Perform and plot hierarchical clustering on a dataframe.

    Args:
        df: Input data in DataFrame format.
        vmin: Minimum value to anchor the colormap. If None, inferred from data.
        vmax: Maximum value to anchor the colormap. If None, inferred from data.
        figsize: Size of the main figure in inches.
        top_height: Height of the top dendrogram.
        left_width: Width of the left dendrogram.
        xmaxticks: Maximum number of x-ticks to display.
        ymaxticks: Maximum number of y-ticks to display.
        metric: Distance metric to use. Either a string to use the same metric for
            both axes, or a tuple of two strings for different metrics for each axis.
        cmap: Matplotlib colormap name. If None, uses "coolwarm".

    Returns:
        A tuple containing:
            - The clustered DataFrame (reordered according to clustering)
            - The matplotlib Figure object
            - The indices of rows in their clustered order
            - The indices of columns in their clustered order
    """
    if isinstance(metric, str):
        metric_x, metric_y = metric, metric
    elif (
        isinstance(metric, tuple)
        and len(metric) == 2
        and isinstance(metric[0], str)
        and isinstance(metric[1], str)
    ):
        metric_x, metric_y = metric
    elif metric is None:
        metric_x, metric_y = None, None
    else:
        raise ValueError("Metric must be a string or a tuple of two strings")

    df = df.copy()

    # Subplot sizes
    total_width, total_height = figsize

    main_h = 1 - (top_height / total_height)
    main_w = 1 - (left_width / total_width)

    gap_x = 0.1 / total_width
    gap_y = 0.1 / total_height

    left_h = main_h
    left_w = 1 - main_w

    top_h = 1 - main_h
    top_w = main_w

    if xmaxticks is None:
        xmaxticks = int(5 * main_w * total_width)
    if ymaxticks is None:
        ymaxticks = int(5 * main_h * total_height)

    dm = df.fillna(0).values
    D1 = squareform(pdist(dm, metric=metric_y))
    D2 = squareform(pdist(dm.T, metric=metric_x))

    fig = plt.figure(figsize=figsize)
    fig.set_layout_engine('tight')

    # add left dendrogram
    ax1 = fig.add_axes([0, 0, left_w - gap_x, left_h], frameon=False)
    Y = linkage(D1, method="complete")
    Z1 = dendrogram(Y, orientation="left", color_threshold=0, above_threshold_color="k")
    ax1.set_xticks([])
    ax1.set_yticks([])
    # add top dendrogram
    ax2 = fig.add_axes([left_w, main_h + gap_y, top_w, top_h - gap_y], frameon=False)
    Y = linkage(D2, method="complete")
    Z2 = dendrogram(Y, color_threshold=0, above_threshold_color="k")
    ax2.set_xticks([])
    ax2.set_yticks([])
    # add matrix plot
    axmatrix = fig.add_axes([left_w, 0, main_w, main_h])
    idx1 = Z1["leaves"]
    idx2 = Z2["leaves"]
    D = dm[idx1, :]
    D = D[:, idx2]

    if cmap is None:
        cmap = "coolwarm"
    im = axmatrix.matshow(D[::-1], aspect="auto", cmap=cmap, vmin=vmin, vmax=vmax)

    axmatrix.set_xticks([])
    axmatrix.set_yticks([])

    ax = plt.gca()
    ax.yaxis.tick_right()
    ax.xaxis.tick_bottom()

    clustered = df.iloc[Z1["leaves"][::-1], Z2["leaves"]]

    ndx_y = np.linspace(0, len(clustered.index) - 1, ymaxticks)
    ndx_x = np.linspace(0, len(clustered.columns) - 1, xmaxticks)
    ndx_y = [int(i) for i in ndx_y]
    ndx_x = [int(i) for i in ndx_x]

    _ = plt.yticks(ndx_y, clustered.iloc[ndx_y].index)
    _ = plt.xticks(ndx_x, clustered.columns[ndx_x], rotation=90)

    ndx_leaves = Z1["leaves"][::-1]
    col_leaves = Z2["leaves"]

    return clustered, fig, ndx_leaves, col_leaves

plot_metabolomics_hist2d(df, figsize=(4, 2.5), dpi=300, set_dim=True, cmap='jet', rt_range=None, mz_range=None, mz_bins=100, **kwargs)

Create a 2D histogram of metabolomics data.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing metabolomics data with scan_time, mz, and intensity columns.

required
figsize Tuple[float, float]

Size of the figure in inches (width, height).

(4, 2.5)
dpi int

Resolution of the figure in dots per inch.

300
set_dim bool

Whether to set figure dimensions.

True
cmap str

Colormap name to use for the plot.

'jet'
rt_range Optional[Tuple[float, float]]

Retention time range (min, max) to display. If None, uses data range.

None
mz_range Optional[Tuple[float, float]]

M/Z range (min, max) to display. If None, uses data range.

None
mz_bins int

Number of bins to use for the m/z axis.

100
**kwargs

Additional keyword arguments passed to plt.hist2d.

{}

Returns:

Type Description
Tuple[ndarray, ndarray, ndarray, Any]

The result of plt.hist2d, which is a tuple containing: - The histogram array - The edges of the bins along the x-axis - The edges of the bins along the y-axis - The Axes object

Source code in src/ms_mint/matplotlib_tools.py
def plot_metabolomics_hist2d(
    df: pd.DataFrame,
    figsize: Tuple[float, float] = (4, 2.5),
    dpi: int = 300,
    set_dim: bool = True,
    cmap: str = "jet",
    rt_range: Optional[Tuple[float, float]] = None,
    mz_range: Optional[Tuple[float, float]] = None,
    mz_bins: int = 100,
    **kwargs,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Any]:
    """Create a 2D histogram of metabolomics data.

    Args:
        df: DataFrame containing metabolomics data with scan_time, mz, and intensity columns.
        figsize: Size of the figure in inches (width, height).
        dpi: Resolution of the figure in dots per inch.
        set_dim: Whether to set figure dimensions.
        cmap: Colormap name to use for the plot.
        rt_range: Retention time range (min, max) to display. If None, uses data range.
        mz_range: M/Z range (min, max) to display. If None, uses data range.
        mz_bins: Number of bins to use for the m/z axis.
        **kwargs: Additional keyword arguments passed to plt.hist2d.

    Returns:
        The result of plt.hist2d, which is a tuple containing:
            - The histogram array
            - The edges of the bins along the x-axis
            - The edges of the bins along the y-axis
            - The Axes object
    """
    if set_dim:
        plt.figure(figsize=figsize, dpi=dpi)

    if mz_range is None:
        mz_range = (df.mz.min(), df.mz.max())

    if rt_range is None:
        rt_range = (df.scan_time.min(), df.scan_time.max())

    rt_bins = int((rt_range[1] - rt_range[0]) / 2)

    params = dict(vmin=1, vmax=1e3, cmap=cmap, range=(rt_range, mz_range))
    params.update(kwargs)

    fig = plt.hist2d(
        df["scan_time"],
        df["mz"],
        weights=df["intensity"].apply(np.log1p),
        bins=[rt_bins, mz_bins],
        **params,
    )

    plt.xlabel("Scan time [s]")
    plt.ylabel("m/z")
    plt.gca().ticklabel_format(useOffset=False, style="plain")
    return fig

plot_peak_shapes(mint_results, mint_metadata=None, fns=None, peak_labels=None, height=3, aspect=1.5, legend=False, col_wrap=4, hue='ms_file_label', title=None, dpi=None, sharex=False, sharey=False, kind='line', **kwargs)

Plot peak shapes from MS-MINT results.

Parameters:

Name Type Description Default
mint_results DataFrame

DataFrame in Mint results format.

required
mint_metadata Optional[DataFrame]

DataFrame in Mint metadata format for additional sample information.

None
fns Optional[List[str]]

Filenames to include. If None, includes all files.

None
peak_labels Optional[Union[str, List[str]]]

Peak label(s) to include. If None, includes all peak labels.

None
height int

Height of each figure facet in inches.

3
aspect float

Aspect ratio (width/height) of each figure facet.

1.5
legend bool

Whether to display a legend.

False
col_wrap int

Number of columns for subplots.

4
hue str

Column name to use for color grouping.

'ms_file_label'
title Optional[str]

Title to add to the figure.

None
dpi Optional[int]

Resolution of generated image.

None
sharex bool

Whether to share x-axis range between subplots.

False
sharey bool

Whether to share y-axis range between subplots.

False
kind str

Type of seaborn relplot ('line', 'scatter', etc.).

'line'
**kwargs

Additional keyword arguments passed to seaborn's relplot.

{}

Returns:

Type Description
FacetGrid

A seaborn FacetGrid object containing the plot.

Source code in src/ms_mint/matplotlib_tools.py
def plot_peak_shapes(
    mint_results: pd.DataFrame,
    mint_metadata: Optional[pd.DataFrame] = None,
    fns: Optional[List[str]] = None,
    peak_labels: Optional[Union[str, List[str]]] = None,
    height: int = 3,
    aspect: float = 1.5,
    legend: bool = False,
    col_wrap: int = 4,
    hue: str = "ms_file_label",
    title: Optional[str] = None,
    dpi: Optional[int] = None,
    sharex: bool = False,
    sharey: bool = False,
    kind: str = "line",
    **kwargs,
) -> sns.FacetGrid:
    """Plot peak shapes from MS-MINT results.

    Args:
        mint_results: DataFrame in Mint results format.
        mint_metadata: DataFrame in Mint metadata format for additional sample information.
        fns: Filenames to include. If None, includes all files.
        peak_labels: Peak label(s) to include. If None, includes all peak labels.
        height: Height of each figure facet in inches.
        aspect: Aspect ratio (width/height) of each figure facet.
        legend: Whether to display a legend.
        col_wrap: Number of columns for subplots.
        hue: Column name to use for color grouping.
        title: Title to add to the figure.
        dpi: Resolution of generated image.
        sharex: Whether to share x-axis range between subplots.
        sharey: Whether to share y-axis range between subplots.
        kind: Type of seaborn relplot ('line', 'scatter', etc.).
        **kwargs: Additional keyword arguments passed to seaborn's relplot.

    Returns:
        A seaborn FacetGrid object containing the plot.
    """
    R = mint_results.copy()
    R = R[R.peak_area > 0]
    R["peak_label"] = R["peak_label"]

    if peak_labels is not None:
        if isinstance(peak_labels, str):
            peak_labels = [peak_labels]
        R = R[R.peak_label.isin(peak_labels)]
    else:
        peak_labels = R.peak_label.drop_duplicates().values

    if fns is not None:
        R = R[R.ms_file.isin(fns)]

    dfs = []
    for peak_label in peak_labels:
        for _, row in R[(R.peak_label == peak_label) & (R.peak_n_datapoints > 1)].iterrows():
            peak_rt = [float(i) for i in row.peak_shape_rt.split(",")]
            peak_int = [float(i) for i in row.peak_shape_int.split(",")]
            ms_file_label = row.ms_file_label
            mz = row.mz_mean
            rt = row.rt

            df = pd.DataFrame(
                {
                    "Scan time [s]": peak_rt,
                    "Intensity": peak_int,
                    "ms_file_label": ms_file_label,
                    "peak_label": peak_label,
                    "Expected Scan time [s]": rt,
                }
            )
            dfs.append(df)

    if not dfs:
        return None

    df = pd.concat(dfs, ignore_index=True).reset_index(drop=True)

    # Add metadata
    if mint_metadata is not None:
        df = pd.merge(df, mint_metadata, left_on="ms_file_label", right_index=True, how="left")

    _facet_kws = dict(sharex=sharex, sharey=sharey)
    if "facet_kws" in kwargs.keys():
        _facet_kws.update(kwargs.pop("facet_kws"))

    g = sns.relplot(
        data=df,
        x="Scan time [s]",
        y="Intensity",
        hue=hue,
        col="peak_label",
        col_order=peak_labels,
        kind=kind,
        col_wrap=col_wrap,
        height=height,
        aspect=aspect,
        facet_kws=_facet_kws,
        legend=legend,
        **kwargs,
    )

    g.set_titles(row_template="{row_name}", col_template="{col_name}")

    for ax in g.axes.flatten():
        ax.ticklabel_format(style="sci", scilimits=(0, 0), axis="y")

    if title is not None:
        g.fig.suptitle(title, y=1.01)

    return g

plot_peaks(series, peaks=None, highlight=None, expected_rt=None, weights=None, legend=True, label=None, **kwargs)

Plot time series data with peak annotations.

Parameters:

Name Type Description Default
series Series

Time series data with time as index and intensity as values.

required
peaks Optional[DataFrame]

DataFrame containing peak information.

None
highlight Optional[List[int]]

List of peak indices to highlight.

None
expected_rt Optional[float]

Expected retention time to mark on the plot.

None
weights Optional[ndarray]

Array of weight values (e.g., for Gaussian weighting).

None
legend bool

Whether to display the legend.

True
label Optional[str]

Label for the time series data.

None
**kwargs

Additional keyword arguments passed to the plot function.

{}

Returns:

Type Description
Figure

Matplotlib Figure containing the plot.

Source code in src/ms_mint/matplotlib_tools.py
def plot_peaks(
    series: pd.Series,
    peaks: Optional[pd.DataFrame] = None,
    highlight: Optional[List[int]] = None,
    expected_rt: Optional[float] = None,
    weights: Optional[np.ndarray] = None,
    legend: bool = True,
    label: Optional[str] = None,
    **kwargs,
) -> Figure:
    """Plot time series data with peak annotations.

    Args:
        series: Time series data with time as index and intensity as values.
        peaks: DataFrame containing peak information.
        highlight: List of peak indices to highlight.
        expected_rt: Expected retention time to mark on the plot.
        weights: Array of weight values (e.g., for Gaussian weighting).
        legend: Whether to display the legend.
        label: Label for the time series data.
        **kwargs: Additional keyword arguments passed to the plot function.

    Returns:
        Matplotlib Figure containing the plot.
    """
    if highlight is None:
        highlight = []
    ax = plt.gca()
    ax.plot(
        series.index,
        series.values,
        label=label if label is not None else "Intensity",
        **kwargs,
    )
    if peaks is not None:
        series.iloc[peaks.ndxs].plot(label="Peaks", marker="x", y="intensity", lw=0, ax=ax)
        for i, (
            ndx,
            (_, _, _, peak_base_height, _, rt_min, rt_max),
        ) in enumerate(peaks.iterrows()):
            if ndx in highlight:
                plt.axvspan(rt_min, rt_max, color="green", alpha=0.25, label="Selected")
            plt.hlines(
                peak_base_height,
                rt_min,
                rt_max,
                color="orange",
                label="Peak width" if i == 0 else None,
            )
    if expected_rt is not None:
        plt.axvspan(expected_rt, expected_rt + 1, color="blue", alpha=1, label="Expected Rt")
    if weights is not None:
        plt.plot(weights, linestyle="--", label="Gaussian weight")
    plt.ylabel("Intensity")
    plt.xlabel("Scan time [s]")
    ax.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
    if not legend:
        ax.get_legend().remove()
    return plt.gcf()

options: show_root_heading: true show_root_full_path: true show_submodules: true members_order: source

get_palette_colors(palette_name, num_colors)

Get a list of colors from a specific colorlover palette.

Parameters:

Name Type Description Default
palette_name str

Name of the color palette.

required
num_colors int

Number of colors to extract.

required

Returns:

Type Description
List[str]

List of color strings in the requested palette.

Source code in src/ms_mint/plotly_tools.py
def get_palette_colors(palette_name: str, num_colors: int) -> List[str]:
    """Get a list of colors from a specific colorlover palette.

    Args:
        palette_name: Name of the color palette.
        num_colors: Number of colors to extract.

    Returns:
        List of color strings in the requested palette.
    """
    # Categories in the colorlover package
    categories = ["qual", "seq", "div"]

    num_colors = max(num_colors, 3)
    # Check in which category our palette resides
    for category in categories:
        if palette_name in cl.scales[f"{num_colors}"][category]:
            return cl.scales[f"{num_colors}"][category][palette_name]

    # If palette not found in any category, return a default one
    return cl.scales[f"{num_colors}"]["qual"]["Paired"]

plotly_heatmap(df, normed_by_cols=False, transposed=False, clustered=False, add_dendrogram=False, name='', x_tick_colors=None, height=None, width=None, correlation=False, call_show=False, verbose=False)

Create an interactive heatmap from a dense-formatted dataframe.

Parameters:

Name Type Description Default
df DataFrame

Input data in DataFrame format.

required
normed_by_cols bool

Whether to normalize column vectors.

False
transposed bool

Whether to transpose the generated image.

False
clustered bool

Whether to apply hierarchical clustering on rows.

False
add_dendrogram bool

Whether to show a dendrogram (only when clustered=True).

False
name str

Name to use in figure title.

''
x_tick_colors Optional[str]

Color of x-ticks.

None
height Optional[int]

Image height in pixels.

None
width Optional[int]

Image width in pixels.

None
correlation bool

Whether to convert the table to a correlation matrix.

False
call_show bool

Whether to display the figure immediately.

False
verbose bool

Whether to print additional information.

False

Returns:

Type Description
Optional[Figure]

A Plotly Figure object, or None if call_show is True.

Source code in src/ms_mint/plotly_tools.py
def plotly_heatmap(
    df: pd.DataFrame,
    normed_by_cols: bool = False,
    transposed: bool = False,
    clustered: bool = False,
    add_dendrogram: bool = False,
    name: str = "",
    x_tick_colors: Optional[str] = None,
    height: Optional[int] = None,
    width: Optional[int] = None,
    correlation: bool = False,
    call_show: bool = False,
    verbose: bool = False,
) -> Optional[PlotlyFigure]:
    """Create an interactive heatmap from a dense-formatted dataframe.

    Args:
        df: Input data in DataFrame format.
        normed_by_cols: Whether to normalize column vectors.
        transposed: Whether to transpose the generated image.
        clustered: Whether to apply hierarchical clustering on rows.
        add_dendrogram: Whether to show a dendrogram (only when clustered=True).
        name: Name to use in figure title.
        x_tick_colors: Color of x-ticks.
        height: Image height in pixels.
        width: Image width in pixels.
        correlation: Whether to convert the table to a correlation matrix.
        call_show: Whether to display the figure immediately.
        verbose: Whether to print additional information.

    Returns:
        A Plotly Figure object, or None if call_show is True.
    """
    max_is_not_zero = df.max(axis=1) != 0
    non_zero_labels = max_is_not_zero[max_is_not_zero].index
    df = df.loc[non_zero_labels]

    colorscale = "Bluered"
    plot_attributes = []

    if normed_by_cols:
        df = df.divide(df.max()).fillna(0)
        plot_attributes.append("normalized")

    if transposed:
        df = df.T

    if correlation:
        plot_type = "Correlation"
        df = df.corr()
        colorscale = [
            [0.0, "rgb(165,0,38)"],
            [0.1111111111111111, "rgb(215,48,39)"],
            [0.2222222222222222, "rgb(244,109,67)"],
            [0.3333333333333333, "rgb(253,174,97)"],
            [0.4444444444444444, "rgb(254,224,144)"],
            [0.5555555555555556, "rgb(224,243,248)"],
            [0.6666666666666666, "rgb(171,217,233)"],
            [0.7777777777777778, "rgb(116,173,209)"],
            [0.8888888888888888, "rgb(69,117,180)"],
            [1.0, "rgb(49,54,149)"],
        ]
    else:
        plot_type = "Heatmap"

    if clustered:
        dendro_side = ff.create_dendrogram(
            df,
            orientation="right",
            labels=df.index.to_list(),
            color_threshold=0,
            colorscale=["black"] * 8,
        )
        dendro_leaves = dendro_side["layout"]["yaxis"]["ticktext"]
        df = df.loc[dendro_leaves, :]
        if correlation:
            df = df[df.index]

    x = df.columns
    if clustered:
        y = dendro_leaves
    else:
        y = df.index.to_list()
    z = df.values

    heatmap = go.Heatmap(x=x, y=y, z=z, colorscale=colorscale)

    if name == "":
        title = ""
    else:
        title = f"{plot_type} of {','.join(plot_attributes)} {name}"

    # Figure without side-dendrogram
    if (not add_dendrogram) or (not clustered):
        fig = go.Figure(heatmap)
        fig.update_layout(
            {"title_x": 0.5},
            title={"text": title},
            yaxis={"title": "", "tickmode": "array", "automargin": True},
        )

        fig.update_layout({"height": height, "width": width, "hovermode": "closest"})

    else:  # Figure with side-dendrogram
        fig = go.Figure()

        for i in range(len(dendro_side["data"])):
            dendro_side["data"][i]["xaxis"] = "x2"

        for data in dendro_side["data"]:
            fig.add_trace(data)

        y_labels = heatmap["y"]
        heatmap["y"] = dendro_side["layout"]["yaxis"]["tickvals"]

        fig.add_trace(heatmap)

        fig.update_layout(
            {
                "height": height,
                "width": width,
                "showlegend": False,
                "hovermode": "closest",
                "paper_bgcolor": "white",
                "plot_bgcolor": "white",
                "title_x": 0.5,
            },
            title={"text": title},
            # X-axis of main figure
            xaxis={
                "domain": [0.11, 1],
                "mirror": False,
                "showgrid": False,
                "showline": False,
                "zeroline": False,
                "showticklabels": True,
                "ticks": "",
            },
            # X-axis of side-dendrogram
            xaxis2={
                "domain": [0, 0.1],
                "mirror": False,
                "showgrid": True,
                "showline": False,
                "zeroline": False,
                "showticklabels": False,
                "ticks": "",
            },
            # Y-axis of main figure
            yaxis={
                "domain": [0, 1],
                "mirror": False,
                "showgrid": False,
                "showline": False,
                "zeroline": False,
                "showticklabels": False,
            },
        )

        fig["layout"]["yaxis"]["ticktext"] = np.asarray(y_labels)
        fig["layout"]["yaxis"]["tickvals"] = np.asarray(dendro_side["layout"]["yaxis"]["tickvals"])

    fig.update_layout(
        autosize=True,
        hovermode="closest",
    )

    fig.update_yaxes(automargin=True)
    fig.update_xaxes(automargin=True)

    if call_show:
        fig.show(config={"displaylogo": False})
        return None
    else:
        return fig

plotly_peak_shapes(mint_results, mint_metadata=None, color='ms_file_label', fns=None, col_wrap=1, peak_labels=None, legend=True, verbose=False, legend_orientation='v', call_show=False, palette='Plasma')

Plot peak shapes from mint results as interactive Plotly figure.

Parameters:

Name Type Description Default
mint_results DataFrame

DataFrame in Mint results format.

required
mint_metadata Optional[DataFrame]

DataFrame in Mint metadata format.

None
color str

Column name determining color-coding of plots.

'ms_file_label'
fns Optional[List[str]]

Filenames to include. If None, all files are used.

None
col_wrap int

Maximum number of subplot columns.

1
peak_labels Optional[Union[str, List[str]]]

Peak-labels to include. If None, all peaks are used.

None
legend bool

Whether to display legend.

True
verbose bool

If True, prints additional details.

False
legend_orientation str

Legend orientation ('v' for vertical, 'h' for horizontal).

'v'
call_show bool

If True, displays the plot immediately.

False
palette str

Color palette to use.

'Plasma'

Returns:

Type Description
Optional[Figure]

A Plotly Figure object, or None if call_show is True.

Source code in src/ms_mint/plotly_tools.py
def plotly_peak_shapes(
    mint_results: pd.DataFrame,
    mint_metadata: Optional[pd.DataFrame] = None,
    color: str = "ms_file_label",
    fns: Optional[List[str]] = None,
    col_wrap: int = 1,
    peak_labels: Optional[Union[str, List[str]]] = None,
    legend: bool = True,
    verbose: bool = False,
    legend_orientation: str = "v",
    call_show: bool = False,
    palette: str = "Plasma",
) -> Optional[PlotlyFigure]:
    """Plot peak shapes from mint results as interactive Plotly figure.

    Args:
        mint_results: DataFrame in Mint results format.
        mint_metadata: DataFrame in Mint metadata format.
        color: Column name determining color-coding of plots.
        fns: Filenames to include. If None, all files are used.
        col_wrap: Maximum number of subplot columns.
        peak_labels: Peak-labels to include. If None, all peaks are used.
        legend: Whether to display legend.
        verbose: If True, prints additional details.
        legend_orientation: Legend orientation ('v' for vertical, 'h' for horizontal).
        call_show: If True, displays the plot immediately.
        palette: Color palette to use.

    Returns:
        A Plotly Figure object, or None if call_show is True.
    """
    mint_results = mint_results.copy()

    # Merge with metadata if provided
    if mint_metadata is not None:
        mint_results = pd.merge(
            mint_results, mint_metadata, left_on="ms_file_label", right_index=True
        )

    # Filter by filenames
    if fns is not None:
        fns = [fn_to_label(fn) for fn in fns]
        mint_results = mint_results[mint_results.ms_file_label.isin(fns)]
    else:
        fns = mint_results.ms_file_label.unique()

    # Filter by peak_labels
    if peak_labels is not None:
        if isinstance(peak_labels, str):
            peak_labels = [peak_labels]
        mint_results = mint_results[mint_results.peak_label.isin(peak_labels)]
    else:
        peak_labels = mint_results.peak_label.unique()

    # Handle colors based on metadata or fall back to default behavior
    colors = None
    if color:
        unique_hues = mint_results[color].unique()

        colors = get_palette_colors(palette, len(unique_hues))

        color_mapping = dict(zip(unique_hues, colors))

        if color == "ms_file_label":
            hue_column = [color_mapping[fn] for fn in fns]
        else:
            # Existing logic remains the same for the else part
            hue_column = (
                mint_results.drop_duplicates("ms_file_label")
                .set_index("ms_file_label")[color]
                .map(color_mapping)
                .reindex(fns)
                .tolist()
            )

    else:
        hue_column = colors

    # Rest of the plotting process
    res = mint_results[mint_results.peak_max > 0]
    labels = mint_results.peak_label.unique()
    res = res.set_index(["peak_label", "ms_file_label"]).sort_index()

    # Calculate necessary number of rows
    n_rows = max(1, len(labels) // col_wrap)
    if n_rows * col_wrap < len(labels):
        n_rows += 1

    fig = make_subplots(rows=max(1, n_rows), cols=max(1, col_wrap), subplot_titles=peak_labels)

    for label_i, label in enumerate(peak_labels):
        for file_i, fn in enumerate(fns):
            try:
                x, y = res.loc[(label, fn), ["peak_shape_rt", "peak_shape_int"]]
            except KeyError as e:
                logging.warning(e)
                continue

            if not isinstance(x, Iterable):
                continue
            if isinstance(x, str):
                x = x.split(",")
                y = y.split(",")

            ndx_r = (label_i // col_wrap) + 1
            ndx_c = label_i % col_wrap + 1

            trace_color = hue_column[file_i]

            fig.add_trace(
                go.Scattergl(
                    x=x,
                    y=y,
                    name=P(fn).name,
                    mode="markers",
                    legendgroup=file_i,
                    showlegend=(label_i == 0),
                    marker_color=trace_color,
                    text=fn,
                    fill="tozeroy",
                    marker=dict(size=3),
                ),
                row=ndx_r,
                col=ndx_c,
            )

            fig.update_xaxes(title_text="Scan time [s]", row=ndx_r, col=ndx_c)
            fig.update_yaxes(title_text="Intensity", row=ndx_r, col=ndx_c)

    # Layout updates
    if legend:
        fig.update_layout(legend_orientation=legend_orientation)

    fig.update_layout(showlegend=legend)
    fig.update_layout(height=400 * n_rows, title_text="Peak Shapes")

    if call_show:
        fig.show(config={"displaylogo": False})
        return None
    else:
        return fig

set_template()

Set a default template for plotly figures.

Creates a "draft" template with smaller font size and sets it as the default template for all plotly figures.

Source code in src/ms_mint/plotly_tools.py
def set_template() -> None:
    """Set a default template for plotly figures.

    Creates a "draft" template with smaller font size and sets it as the default
    template for all plotly figures.
    """
    pio.templates["draft"] = go.layout.Template(
        layout=dict(font={"size": 10}),
    )

    pio.templates.default = "draft"

options: show_root_heading: true show_root_full_path: true show_submodules: true members_order: source

PCA_Plotter

Class for visualizing PCA results from MS-MINT analysis.

This class provides methods to create various plots of PCA results, including cumulative variance plots, pairplots, and loading plots.

Attributes:

Name Type Description
pca

The PrincipalComponentsAnalyser instance containing results to visualize.

Source code in src/ms_mint/pca.py
class PCA_Plotter:
    """Class for visualizing PCA results from MS-MINT analysis.

    This class provides methods to create various plots of PCA results,
    including cumulative variance plots, pairplots, and loading plots.

    Attributes:
        pca: The PrincipalComponentsAnalyser instance containing results to visualize.
    """

    def __init__(self, pca: PrincipalComponentsAnalyser) -> None:
        """Initialize a PCA_Plotter instance.

        Args:
            pca: PrincipalComponentsAnalyser instance with results to visualize.
        """
        self.pca = pca

    def cumulative_variance(
        self, interactive: bool = False, **kwargs
    ) -> Union[Figure, PlotlyFigure]:
        """Plot the cumulative explained variance of principal components.

        Args:
            interactive: If True, returns a Plotly interactive figure.
                If False, returns a static Matplotlib figure.
            **kwargs: Additional keyword arguments passed to the underlying plotting functions.

        Returns:
            Either a Matplotlib figure or a Plotly figure depending on the interactive parameter.
        """
        if interactive:
            return self.cumulative_variance_px(**kwargs)
        else:
            return self.cumulative_variance_sns(**kwargs)

    def cumulative_variance_px(self, **kwargs) -> PlotlyFigure:
        """Create an interactive Plotly plot of cumulative explained variance.

        Args:
            **kwargs: Additional keyword arguments passed to px.bar.

        Returns:
            Plotly figure showing cumulative explained variance.
        """
        n_components = self.pca.results["n_components"]
        cum_expl_var = self.pca.results["cum_expl_var"]
        df = pd.DataFrame(
            {
                "Principal Component": np.arange(n_components) + 1,
                "Explained variance [%]": cum_expl_var,
            }
        )
        fig = px.bar(
            df,
            x="Principal Component",
            y="Explained variance [%]",
            title="Cumulative explained variance",
            labels={
                "Principal Component": "Principal Component",
                "Explained variance [%]": "Explained variance [%]",
            },
            **kwargs,
        )
        fig.update_layout(autosize=True, showlegend=False)
        return fig

    def cumulative_variance_sns(self, **kwargs) -> Figure:
        """Create a static Matplotlib plot of cumulative explained variance.

        Args:
            **kwargs: Additional keyword arguments for figure customization.
                'aspect': Width-to-height ratio of the figure (default: 1).
                'height': Height of the figure in inches (default: 5).

        Returns:
            Matplotlib figure showing cumulative explained variance.
        """
        # Set default values for aspect and height
        aspect = kwargs.get("aspect", 1)
        height = kwargs.get("height", 5)

        n_components = self.pca.results["n_components"]
        cum_expl_var = self.pca.results["cum_expl_var"]

        # Calculate width based on aspect ratio and number of components
        width = height * aspect

        fig, ax = plt.subplots(figsize=(width, height))
        ax.bar(
            np.arange(n_components) + 1,
            cum_expl_var,
            facecolor="grey",
            edgecolor="none",
        )
        ax.set_xlabel("Principal Component")
        ax.set_ylabel("Explained variance [%]")
        ax.set_title("Cumulative explained variance")
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.set_xticks(range(1, len(cum_expl_var) + 1))
        return fig

    def _prepare_data(
        self, n_components: int = 3, hue: Optional[Union[str, List[str]]] = None
    ) -> pd.DataFrame:
        """Prepare data for pairplot visualization.

        Args:
            n_components: Number of principal components to include.
            hue: Labels used for coloring points. If a string, data is taken from
                the mint.meta DataFrame. If a list, values are used directly.

        Returns:
            DataFrame containing the prepared data for visualization.
        """
        df = self.pca.results["df_projected"].copy()
        cols = df.columns.to_list()[:n_components]
        df = df[cols]

        df = pd.merge(
            df, self.pca.mint.meta.dropna(axis=1, how="all"), left_index=True, right_index=True
        )

        if hue and (not isinstance(hue, str)):
            df["Label"] = hue
            df["Label"] = df["Label"].astype(str)

        return df

    def pairplot(
        self,
        n_components: int = 3,
        hue: Optional[Union[str, List[str]]] = None,
        fig_kws: Optional[Dict[str, Any]] = None,
        interactive: bool = False,
        **kwargs,
    ) -> Union[sns.axisgrid.PairGrid, PlotlyFigure]:
        """Create a pairplot of principal components.

        Args:
            n_components: Number of principal components to include in the plot.
            hue: Labels used for coloring points. If a string, data is taken from
                the mint.meta DataFrame. If a list, values are used directly.
            fig_kws: Keyword arguments passed to plt.figure if using seaborn.
            interactive: If True, returns a Plotly interactive figure.
                If False, returns a static Seaborn PairGrid.
            **kwargs: Additional keyword arguments passed to the underlying plotting functions.

        Returns:
            Either a Seaborn PairGrid or a Plotly figure depending on the interactive parameter.
        """
        df = self._prepare_data(n_components=n_components, hue=hue)

        if isinstance(hue, list):
            hue = "label"

        if interactive:
            return self.pairplot_plotly(df, color_col=hue, **kwargs)
        else:
            return self.pairplot_sns(df, fig_kws=fig_kws, hue=hue, **kwargs)

    def pairplot_sns(
        self, df: pd.DataFrame, fig_kws: Optional[Dict[str, Any]] = None, **kwargs
    ) -> sns.axisgrid.PairGrid:
        """Create a static Seaborn pairplot of principal components.

        Args:
            df: DataFrame containing the data to visualize.
            fig_kws: Keyword arguments passed to plt.figure.
            **kwargs: Additional keyword arguments passed to sns.pairplot.

        Returns:
            Seaborn PairGrid object.
        """
        if fig_kws is None:
            fig_kws = {}
        plt.figure(**fig_kws)
        g = sns.pairplot(df, **kwargs)
        return g

    def pairplot_plotly(
        self, df: pd.DataFrame, color_col: Optional[str] = None, **kwargs
    ) -> PlotlyFigure:
        """Create an interactive Plotly pairplot of principal components.

        Args:
            df: DataFrame containing the data to visualize.
            color_col: Column name to use for coloring points.
            **kwargs: Additional keyword arguments passed to ff.create_scatterplotmatrix.

        Returns:
            Plotly figure object.
        """
        columns = df.filter(regex=f"PC|^{color_col}$").columns
        fig = ff.create_scatterplotmatrix(
            df[columns], index=color_col, hovertext=df.index, **kwargs
        )
        # set the legendgroup equal to the marker color
        for t in fig.data:
            t.legendgroup = t.marker.color
        return fig

    def loadings(
        self, interactive: bool = False, **kwargs
    ) -> Union[sns.axisgrid.FacetGrid, PlotlyFigure]:
        """Plot PCA loadings (feature contributions to principal components).

        Args:
            interactive: If True, returns a Plotly interactive figure.
                If False, returns a static Seaborn FacetGrid.
            **kwargs: Additional keyword arguments passed to the underlying plotting functions.

        Returns:
            Either a Seaborn FacetGrid or a Plotly figure depending on the interactive parameter.
        """
        if interactive:
            return self.loadings_plotly(**kwargs)
        else:
            return self.loadings_sns(**kwargs)

    def loadings_sns(self, **kwargs) -> sns.axisgrid.FacetGrid:
        """Create a static Seaborn plot of PCA loadings.

        Args:
            **kwargs: Additional keyword arguments passed to sns.catplot.
                If 'row' is not specified, it defaults to 'PC'.

        Returns:
            Seaborn FacetGrid object.
        """
        if "row" not in kwargs:
            kwargs["row"] = "PC"
        g = sns.catplot(
            data=self.pca.results["feature_contributions"],
            x="peak_label",
            y="Coefficient",
            kind="bar",
            **kwargs,
        )
        plt.tight_layout()
        return g

    def loadings_plotly(self, **kwargs) -> PlotlyFigure:
        """Create an interactive Plotly plot of PCA loadings.

        Args:
            **kwargs: Additional keyword arguments passed to px.bar.
                If 'facet_row' is not specified, it defaults to 'PC'.

        Returns:
            Plotly figure object.
        """
        if "facet_row" not in kwargs:
            kwargs["facet_row"] = "PC"
        fig = px.bar(
            self.pca.results["feature_contributions"],
            x="peak_label",
            y="Coefficient",
            barmode="group",
            **kwargs,
        )
        return fig

__init__(pca)

Initialize a PCA_Plotter instance.

Parameters:

Name Type Description Default
pca PrincipalComponentsAnalyser

PrincipalComponentsAnalyser instance with results to visualize.

required
Source code in src/ms_mint/pca.py
def __init__(self, pca: PrincipalComponentsAnalyser) -> None:
    """Initialize a PCA_Plotter instance.

    Args:
        pca: PrincipalComponentsAnalyser instance with results to visualize.
    """
    self.pca = pca

cumulative_variance(interactive=False, **kwargs)

Plot the cumulative explained variance of principal components.

Parameters:

Name Type Description Default
interactive bool

If True, returns a Plotly interactive figure. If False, returns a static Matplotlib figure.

False
**kwargs

Additional keyword arguments passed to the underlying plotting functions.

{}

Returns:

Type Description
Union[Figure, Figure]

Either a Matplotlib figure or a Plotly figure depending on the interactive parameter.

Source code in src/ms_mint/pca.py
def cumulative_variance(
    self, interactive: bool = False, **kwargs
) -> Union[Figure, PlotlyFigure]:
    """Plot the cumulative explained variance of principal components.

    Args:
        interactive: If True, returns a Plotly interactive figure.
            If False, returns a static Matplotlib figure.
        **kwargs: Additional keyword arguments passed to the underlying plotting functions.

    Returns:
        Either a Matplotlib figure or a Plotly figure depending on the interactive parameter.
    """
    if interactive:
        return self.cumulative_variance_px(**kwargs)
    else:
        return self.cumulative_variance_sns(**kwargs)

cumulative_variance_px(**kwargs)

Create an interactive Plotly plot of cumulative explained variance.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments passed to px.bar.

{}

Returns:

Type Description
Figure

Plotly figure showing cumulative explained variance.

Source code in src/ms_mint/pca.py
def cumulative_variance_px(self, **kwargs) -> PlotlyFigure:
    """Create an interactive Plotly plot of cumulative explained variance.

    Args:
        **kwargs: Additional keyword arguments passed to px.bar.

    Returns:
        Plotly figure showing cumulative explained variance.
    """
    n_components = self.pca.results["n_components"]
    cum_expl_var = self.pca.results["cum_expl_var"]
    df = pd.DataFrame(
        {
            "Principal Component": np.arange(n_components) + 1,
            "Explained variance [%]": cum_expl_var,
        }
    )
    fig = px.bar(
        df,
        x="Principal Component",
        y="Explained variance [%]",
        title="Cumulative explained variance",
        labels={
            "Principal Component": "Principal Component",
            "Explained variance [%]": "Explained variance [%]",
        },
        **kwargs,
    )
    fig.update_layout(autosize=True, showlegend=False)
    return fig

cumulative_variance_sns(**kwargs)

Create a static Matplotlib plot of cumulative explained variance.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments for figure customization. 'aspect': Width-to-height ratio of the figure (default: 1). 'height': Height of the figure in inches (default: 5).

{}

Returns:

Type Description
Figure

Matplotlib figure showing cumulative explained variance.

Source code in src/ms_mint/pca.py
def cumulative_variance_sns(self, **kwargs) -> Figure:
    """Create a static Matplotlib plot of cumulative explained variance.

    Args:
        **kwargs: Additional keyword arguments for figure customization.
            'aspect': Width-to-height ratio of the figure (default: 1).
            'height': Height of the figure in inches (default: 5).

    Returns:
        Matplotlib figure showing cumulative explained variance.
    """
    # Set default values for aspect and height
    aspect = kwargs.get("aspect", 1)
    height = kwargs.get("height", 5)

    n_components = self.pca.results["n_components"]
    cum_expl_var = self.pca.results["cum_expl_var"]

    # Calculate width based on aspect ratio and number of components
    width = height * aspect

    fig, ax = plt.subplots(figsize=(width, height))
    ax.bar(
        np.arange(n_components) + 1,
        cum_expl_var,
        facecolor="grey",
        edgecolor="none",
    )
    ax.set_xlabel("Principal Component")
    ax.set_ylabel("Explained variance [%]")
    ax.set_title("Cumulative explained variance")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.set_xticks(range(1, len(cum_expl_var) + 1))
    return fig

loadings(interactive=False, **kwargs)

Plot PCA loadings (feature contributions to principal components).

Parameters:

Name Type Description Default
interactive bool

If True, returns a Plotly interactive figure. If False, returns a static Seaborn FacetGrid.

False
**kwargs

Additional keyword arguments passed to the underlying plotting functions.

{}

Returns:

Type Description
Union[FacetGrid, Figure]

Either a Seaborn FacetGrid or a Plotly figure depending on the interactive parameter.

Source code in src/ms_mint/pca.py
def loadings(
    self, interactive: bool = False, **kwargs
) -> Union[sns.axisgrid.FacetGrid, PlotlyFigure]:
    """Plot PCA loadings (feature contributions to principal components).

    Args:
        interactive: If True, returns a Plotly interactive figure.
            If False, returns a static Seaborn FacetGrid.
        **kwargs: Additional keyword arguments passed to the underlying plotting functions.

    Returns:
        Either a Seaborn FacetGrid or a Plotly figure depending on the interactive parameter.
    """
    if interactive:
        return self.loadings_plotly(**kwargs)
    else:
        return self.loadings_sns(**kwargs)

loadings_plotly(**kwargs)

Create an interactive Plotly plot of PCA loadings.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments passed to px.bar. If 'facet_row' is not specified, it defaults to 'PC'.

{}

Returns:

Type Description
Figure

Plotly figure object.

Source code in src/ms_mint/pca.py
def loadings_plotly(self, **kwargs) -> PlotlyFigure:
    """Create an interactive Plotly plot of PCA loadings.

    Args:
        **kwargs: Additional keyword arguments passed to px.bar.
            If 'facet_row' is not specified, it defaults to 'PC'.

    Returns:
        Plotly figure object.
    """
    if "facet_row" not in kwargs:
        kwargs["facet_row"] = "PC"
    fig = px.bar(
        self.pca.results["feature_contributions"],
        x="peak_label",
        y="Coefficient",
        barmode="group",
        **kwargs,
    )
    return fig

loadings_sns(**kwargs)

Create a static Seaborn plot of PCA loadings.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments passed to sns.catplot. If 'row' is not specified, it defaults to 'PC'.

{}

Returns:

Type Description
FacetGrid

Seaborn FacetGrid object.

Source code in src/ms_mint/pca.py
def loadings_sns(self, **kwargs) -> sns.axisgrid.FacetGrid:
    """Create a static Seaborn plot of PCA loadings.

    Args:
        **kwargs: Additional keyword arguments passed to sns.catplot.
            If 'row' is not specified, it defaults to 'PC'.

    Returns:
        Seaborn FacetGrid object.
    """
    if "row" not in kwargs:
        kwargs["row"] = "PC"
    g = sns.catplot(
        data=self.pca.results["feature_contributions"],
        x="peak_label",
        y="Coefficient",
        kind="bar",
        **kwargs,
    )
    plt.tight_layout()
    return g

pairplot(n_components=3, hue=None, fig_kws=None, interactive=False, **kwargs)

Create a pairplot of principal components.

Parameters:

Name Type Description Default
n_components int

Number of principal components to include in the plot.

3
hue Optional[Union[str, List[str]]]

Labels used for coloring points. If a string, data is taken from the mint.meta DataFrame. If a list, values are used directly.

None
fig_kws Optional[Dict[str, Any]]

Keyword arguments passed to plt.figure if using seaborn.

None
interactive bool

If True, returns a Plotly interactive figure. If False, returns a static Seaborn PairGrid.

False
**kwargs

Additional keyword arguments passed to the underlying plotting functions.

{}

Returns:

Type Description
Union[PairGrid, Figure]

Either a Seaborn PairGrid or a Plotly figure depending on the interactive parameter.

Source code in src/ms_mint/pca.py
def pairplot(
    self,
    n_components: int = 3,
    hue: Optional[Union[str, List[str]]] = None,
    fig_kws: Optional[Dict[str, Any]] = None,
    interactive: bool = False,
    **kwargs,
) -> Union[sns.axisgrid.PairGrid, PlotlyFigure]:
    """Create a pairplot of principal components.

    Args:
        n_components: Number of principal components to include in the plot.
        hue: Labels used for coloring points. If a string, data is taken from
            the mint.meta DataFrame. If a list, values are used directly.
        fig_kws: Keyword arguments passed to plt.figure if using seaborn.
        interactive: If True, returns a Plotly interactive figure.
            If False, returns a static Seaborn PairGrid.
        **kwargs: Additional keyword arguments passed to the underlying plotting functions.

    Returns:
        Either a Seaborn PairGrid or a Plotly figure depending on the interactive parameter.
    """
    df = self._prepare_data(n_components=n_components, hue=hue)

    if isinstance(hue, list):
        hue = "label"

    if interactive:
        return self.pairplot_plotly(df, color_col=hue, **kwargs)
    else:
        return self.pairplot_sns(df, fig_kws=fig_kws, hue=hue, **kwargs)

pairplot_plotly(df, color_col=None, **kwargs)

Create an interactive Plotly pairplot of principal components.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the data to visualize.

required
color_col Optional[str]

Column name to use for coloring points.

None
**kwargs

Additional keyword arguments passed to ff.create_scatterplotmatrix.

{}

Returns:

Type Description
Figure

Plotly figure object.

Source code in src/ms_mint/pca.py
def pairplot_plotly(
    self, df: pd.DataFrame, color_col: Optional[str] = None, **kwargs
) -> PlotlyFigure:
    """Create an interactive Plotly pairplot of principal components.

    Args:
        df: DataFrame containing the data to visualize.
        color_col: Column name to use for coloring points.
        **kwargs: Additional keyword arguments passed to ff.create_scatterplotmatrix.

    Returns:
        Plotly figure object.
    """
    columns = df.filter(regex=f"PC|^{color_col}$").columns
    fig = ff.create_scatterplotmatrix(
        df[columns], index=color_col, hovertext=df.index, **kwargs
    )
    # set the legendgroup equal to the marker color
    for t in fig.data:
        t.legendgroup = t.marker.color
    return fig

pairplot_sns(df, fig_kws=None, **kwargs)

Create a static Seaborn pairplot of principal components.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the data to visualize.

required
fig_kws Optional[Dict[str, Any]]

Keyword arguments passed to plt.figure.

None
**kwargs

Additional keyword arguments passed to sns.pairplot.

{}

Returns:

Type Description
PairGrid

Seaborn PairGrid object.

Source code in src/ms_mint/pca.py
def pairplot_sns(
    self, df: pd.DataFrame, fig_kws: Optional[Dict[str, Any]] = None, **kwargs
) -> sns.axisgrid.PairGrid:
    """Create a static Seaborn pairplot of principal components.

    Args:
        df: DataFrame containing the data to visualize.
        fig_kws: Keyword arguments passed to plt.figure.
        **kwargs: Additional keyword arguments passed to sns.pairplot.

    Returns:
        Seaborn PairGrid object.
    """
    if fig_kws is None:
        fig_kws = {}
    plt.figure(**fig_kws)
    g = sns.pairplot(df, **kwargs)
    return g

PrincipalComponentsAnalyser

Class for applying PCA to MS-MINT analysis results.

This class provides functionality to perform Principal Component Analysis on MS-MINT metabolomics data and store the results for visualization.

Attributes:

Name Type Description
mint

The Mint instance containing the data to analyze.

results Optional[Dict[str, Any]]

Dictionary containing PCA results after running the analysis.

plot

PCA_Plotter instance for visualizing the PCA results.

Source code in src/ms_mint/pca.py
class PrincipalComponentsAnalyser:
    """Class for applying PCA to MS-MINT analysis results.

    This class provides functionality to perform Principal Component Analysis on
    MS-MINT metabolomics data and store the results for visualization.

    Attributes:
        mint: The Mint instance containing the data to analyze.
        results: Dictionary containing PCA results after running the analysis.
        plot: PCA_Plotter instance for visualizing the PCA results.
    """

    def __init__(self, mint: Optional["ms_mint.Mint.Mint"] = None) -> None:
        """Initialize a PrincipalComponentsAnalyser instance.

        Args:
            mint: Mint instance containing the data to analyze.
        """
        self.mint = mint
        self.results: Optional[Dict[str, Any]] = None
        self.plot = PCA_Plotter(self)

    def run(
        self,
        n_components: int = 3,
        on: Optional[str] = None,
        var_name: str = "peak_max",
        fillna: Union[str, float] = "median",
        apply: Optional[str] = None,
        groupby: Optional[Union[str, List[str]]] = None,
        scaler: str = "standard",
    ) -> None:
        """Run Principal Component Analysis on the current results.

        Performs PCA on the data and stores results in self.results.

        Args:
            n_components: Number of PCA components to calculate.
            on: Deprecated, use var_name instead.
            var_name: Column name from results to use for PCA.
            fillna: Method to fill missing values. One of "median", "mean", "zero",
                or a numeric value.
            apply: Transformation to apply to the data before PCA.
            groupby: Column(s) to group by before analysis.
            scaler: Method to scale the data. One of "standard", "robust", "minmax".

        Raises:
            DeprecationWarning: If the deprecated 'on' parameter is used.
        """
        if on is not None:
            warnings.warn("on is deprecated, use var_name instead", DeprecationWarning)
            var_name = on

        df = self.mint.crosstab(var_name=var_name, apply=apply, scaler=scaler, groupby=groupby)

        if fillna == "median":
            fillna = df.median()
        elif fillna == "mean":
            fillna = df.mean()
        elif fillna == "zero":
            fillna = 0

        df = df.fillna(fillna)

        min_dim = min(df.shape)
        n_components = min(n_components, min_dim)
        pca = PCA(n_components)
        X_projected = pca.fit_transform(df)
        # Convert to dataframe
        df_projected = pd.DataFrame(X_projected, index=df.index.get_level_values(0))
        # Set columns to PC-1, PC-2, ...
        df_projected.columns = [f"PC-{int(i) + 1}" for i in df_projected.columns]

        # Calculate cumulative explained variance in percent
        explained_variance = pca.explained_variance_ratio_ * 100
        cum_expl_var = np.cumsum(explained_variance)

        # Create feature contributions
        a = np.zeros((n_components, n_components), int)
        np.fill_diagonal(a, 1)
        dfc = pd.DataFrame(pca.inverse_transform(a))
        dfc.columns = df.columns
        dfc.index = [f"PC-{i + 1}" for i in range(n_components)]
        dfc.index.name = "PC"
        # convert to long format
        dfc = dfc.stack().reset_index().rename(columns={0: "Coefficient"})

        self.results = {
            "df_projected": df_projected,
            "cum_expl_var": cum_expl_var,
            "n_components": n_components,
            "type": "PCA",
            "feature_contributions": dfc,
            "class": pca,
        }

__init__(mint=None)

Initialize a PrincipalComponentsAnalyser instance.

Parameters:

Name Type Description Default
mint Optional['ms_mint.Mint.Mint']

Mint instance containing the data to analyze.

None
Source code in src/ms_mint/pca.py
def __init__(self, mint: Optional["ms_mint.Mint.Mint"] = None) -> None:
    """Initialize a PrincipalComponentsAnalyser instance.

    Args:
        mint: Mint instance containing the data to analyze.
    """
    self.mint = mint
    self.results: Optional[Dict[str, Any]] = None
    self.plot = PCA_Plotter(self)

run(n_components=3, on=None, var_name='peak_max', fillna='median', apply=None, groupby=None, scaler='standard')

Run Principal Component Analysis on the current results.

Performs PCA on the data and stores results in self.results.

Parameters:

Name Type Description Default
n_components int

Number of PCA components to calculate.

3
on Optional[str]

Deprecated, use var_name instead.

None
var_name str

Column name from results to use for PCA.

'peak_max'
fillna Union[str, float]

Method to fill missing values. One of "median", "mean", "zero", or a numeric value.

'median'
apply Optional[str]

Transformation to apply to the data before PCA.

None
groupby Optional[Union[str, List[str]]]

Column(s) to group by before analysis.

None
scaler str

Method to scale the data. One of "standard", "robust", "minmax".

'standard'

Raises:

Type Description
DeprecationWarning

If the deprecated 'on' parameter is used.

Source code in src/ms_mint/pca.py
def run(
    self,
    n_components: int = 3,
    on: Optional[str] = None,
    var_name: str = "peak_max",
    fillna: Union[str, float] = "median",
    apply: Optional[str] = None,
    groupby: Optional[Union[str, List[str]]] = None,
    scaler: str = "standard",
) -> None:
    """Run Principal Component Analysis on the current results.

    Performs PCA on the data and stores results in self.results.

    Args:
        n_components: Number of PCA components to calculate.
        on: Deprecated, use var_name instead.
        var_name: Column name from results to use for PCA.
        fillna: Method to fill missing values. One of "median", "mean", "zero",
            or a numeric value.
        apply: Transformation to apply to the data before PCA.
        groupby: Column(s) to group by before analysis.
        scaler: Method to scale the data. One of "standard", "robust", "minmax".

    Raises:
        DeprecationWarning: If the deprecated 'on' parameter is used.
    """
    if on is not None:
        warnings.warn("on is deprecated, use var_name instead", DeprecationWarning)
        var_name = on

    df = self.mint.crosstab(var_name=var_name, apply=apply, scaler=scaler, groupby=groupby)

    if fillna == "median":
        fillna = df.median()
    elif fillna == "mean":
        fillna = df.mean()
    elif fillna == "zero":
        fillna = 0

    df = df.fillna(fillna)

    min_dim = min(df.shape)
    n_components = min(n_components, min_dim)
    pca = PCA(n_components)
    X_projected = pca.fit_transform(df)
    # Convert to dataframe
    df_projected = pd.DataFrame(X_projected, index=df.index.get_level_values(0))
    # Set columns to PC-1, PC-2, ...
    df_projected.columns = [f"PC-{int(i) + 1}" for i in df_projected.columns]

    # Calculate cumulative explained variance in percent
    explained_variance = pca.explained_variance_ratio_ * 100
    cum_expl_var = np.cumsum(explained_variance)

    # Create feature contributions
    a = np.zeros((n_components, n_components), int)
    np.fill_diagonal(a, 1)
    dfc = pd.DataFrame(pca.inverse_transform(a))
    dfc.columns = df.columns
    dfc.index = [f"PC-{i + 1}" for i in range(n_components)]
    dfc.index.name = "PC"
    # convert to long format
    dfc = dfc.stack().reset_index().rename(columns={0: "Coefficient"})

    self.results = {
        "df_projected": df_projected,
        "cum_expl_var": cum_expl_var,
        "n_components": n_components,
        "type": "PCA",
        "feature_contributions": dfc,
        "class": pca,
    }

options: show_root_heading: true show_root_full_path: true show_submodules: true members_order: source