Source code for peptacular.sequence.digestion

from collections.abc import Sequence
from typing import overload

from ..annotation import ProFormaAnnotation
from ..constants import parallelMethod, parallelMethodLiteral
from ..digestion.core import generate_regex
from ..spans import Span
from .parallel import parallel_apply_internal
from .util import get_annotation_input


def _left_semi_digest(
    sequence: str | ProFormaAnnotation,
    min_len: int | None = None,
    max_len: int | None = None,
) -> list[tuple[str, Span]]:
    annot = get_annotation_input(sequence, copy=False)
    return [
        (annot[span].serialize(), span)
        for span in annot.left_semi_spans(
            min_len=min_len,
            max_len=max_len,
        )
    ]


@overload
def left_semi_digest(
    sequence: str | ProFormaAnnotation,
    min_len: int | None = None,
    max_len: int | None = None,
    n_workers: None = None,
    chunksize: None = None,
    method: parallelMethod | parallelMethodLiteral | None = None,
) -> list[tuple[str, Span]]: ...


@overload
def left_semi_digest(
    sequence: Sequence[str | ProFormaAnnotation],
    min_len: int | None = None,
    max_len: int | None = None,
    n_workers: int | None = None,
    chunksize: int | None = None,
    method: parallelMethod | parallelMethodLiteral | None = None,
) -> list[list[tuple[str, Span]]]: ...


[docs] def left_semi_digest( sequence: str | ProFormaAnnotation | Sequence[str | ProFormaAnnotation], min_len: int | None = None, max_len: int | None = None, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]] | list[list[tuple[str, Span]]]: if isinstance(sequence, Sequence) and not isinstance(sequence, str) and not isinstance(sequence, ProFormaAnnotation): return parallel_apply_internal( _left_semi_digest, sequence, n_workers=n_workers, chunksize=chunksize, method=method, min_len=min_len, max_len=max_len, ) else: return _left_semi_digest( sequence=sequence, min_len=min_len, max_len=max_len, )
def _right_semi_digest( sequence: str | ProFormaAnnotation, min_len: int | None = None, max_len: int | None = None, ) -> list[tuple[str, Span]]: annot = get_annotation_input(sequence, copy=False) return [ (annot[span].serialize(), span) for span in annot.right_semi_spans( min_len=min_len, max_len=max_len, ) ] @overload def right_semi_digest( sequence: str | ProFormaAnnotation, min_len: int | None = None, max_len: int | None = None, n_workers: None = None, chunksize: None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]]: ... @overload def right_semi_digest( sequence: Sequence[str | ProFormaAnnotation], min_len: int | None = None, max_len: int | None = None, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[list[tuple[str, Span]]]: ...
[docs] def right_semi_digest( sequence: str | ProFormaAnnotation | Sequence[str | ProFormaAnnotation], min_len: int | None = None, max_len: int | None = None, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]] | list[list[tuple[str, Span]]]: if isinstance(sequence, Sequence) and not isinstance(sequence, str) and not isinstance(sequence, ProFormaAnnotation): return parallel_apply_internal( _right_semi_digest, sequence, n_workers=n_workers, chunksize=chunksize, method=method, min_len=min_len, max_len=max_len, ) else: return _right_semi_digest( sequence=sequence, min_len=min_len, max_len=max_len, )
def _semi_digest( sequence: str | ProFormaAnnotation, min_len: int | None = None, max_len: int | None = None, ) -> list[tuple[str, Span]]: annot = get_annotation_input(sequence, copy=False) return [ (annot[span].serialize(), span) for span in annot.semi_spans( min_len=min_len, max_len=max_len, ) ] @overload def semi_digest( sequence: str | ProFormaAnnotation, min_len: int | None = None, max_len: int | None = None, n_workers: None = None, chunksize: None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]]: ... @overload def semi_digest( sequence: Sequence[str | ProFormaAnnotation], min_len: int | None = None, max_len: int | None = None, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[list[tuple[str, Span]]]: ...
[docs] def semi_digest( sequence: str | ProFormaAnnotation | Sequence[str | ProFormaAnnotation], min_len: int | None = None, max_len: int | None = None, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]] | list[list[tuple[str, Span]]]: """ Builds all semi-enzymatic sequences from the given input `sequence`. Equivalent to combining left and right semi-enzymatic sequences. """ if isinstance(sequence, Sequence) and not isinstance(sequence, str) and not isinstance(sequence, ProFormaAnnotation): return parallel_apply_internal( _semi_digest, sequence, n_workers=n_workers, chunksize=chunksize, method=method, min_len=min_len, max_len=max_len, ) else: return _semi_digest( sequence=sequence, min_len=min_len, max_len=max_len, )
def _nonspecific_digest( sequence: str | ProFormaAnnotation, min_len: int | None = None, max_len: int | None = None, ) -> list[tuple[str, Span]]: annot = get_annotation_input(sequence, copy=False) return [ (annot[span].serialize(), span) for span in annot.nonspecific_spans( min_len=min_len, max_len=max_len, ) ] @overload def nonspecific_digest( sequence: str | ProFormaAnnotation, min_len: int | None = None, max_len: int | None = None, n_workers: None = None, chunksize: None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]]: ... @overload def nonspecific_digest( sequence: Sequence[str | ProFormaAnnotation], min_len: int | None = None, max_len: int | None = None, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[list[tuple[str, Span]]]: ...
[docs] def nonspecific_digest( sequence: str | ProFormaAnnotation | Sequence[str | ProFormaAnnotation], min_len: int | None = None, max_len: int | None = None, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]] | list[list[tuple[str, Span]]]: """ Builds all non-enzymatic sequences from the given input `sequence`. """ if isinstance(sequence, Sequence) and not isinstance(sequence, str) and not isinstance(sequence, ProFormaAnnotation): return parallel_apply_internal( _nonspecific_digest, sequence, n_workers=n_workers, chunksize=chunksize, method=method, min_len=min_len, max_len=max_len, ) else: return _nonspecific_digest( sequence=sequence, min_len=min_len, max_len=max_len, )
def _cleavage_sites(sequence: str | ProFormaAnnotation, enzyme_regex: str) -> list[int]: return list(get_annotation_input(sequence, copy=False).cleavage_sites(enzyme=enzyme_regex)) @overload def cleavage_sites( sequence: str | ProFormaAnnotation, enzyme_regex: str, n_workers: None = None, chunksize: None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[int]: ... @overload def cleavage_sites( sequence: Sequence[str | ProFormaAnnotation], enzyme_regex: str, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[list[int]]: ...
[docs] def cleavage_sites( sequence: str | ProFormaAnnotation | Sequence[str | ProFormaAnnotation], enzyme_regex: str, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[int] | list[list[int]]: """ Return positions where cleavage occurs in input `sequence` based on the provided enzyme regex. """ if isinstance(sequence, Sequence) and not isinstance(sequence, str) and not isinstance(sequence, ProFormaAnnotation): return parallel_apply_internal( _cleavage_sites, sequence, n_workers=n_workers, chunksize=chunksize, method=method, enzyme_regex=enzyme_regex, ) else: return _cleavage_sites( sequence=sequence, enzyme_regex=enzyme_regex, )
def _simple_cleavage_sites( sequence: str | ProFormaAnnotation, cleave_on: str, restrict_before: str = "", restrict_after: str = "", cterminal: bool = True, ) -> list[int]: enzyme_regex = generate_regex( cleave_on=cleave_on, restrict_before=restrict_before, restrict_after=restrict_after, cterminal=cterminal, ) return list(get_annotation_input(sequence, copy=False).cleavage_sites(enzyme=enzyme_regex)) @overload def simple_cleavage_sites( sequence: str | ProFormaAnnotation, cleave_on: str, restrict_before: str = "", restrict_after: str = "", cterminal: bool = True, n_workers: None = None, chunksize: None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[int]: ... @overload def simple_cleavage_sites( sequence: Sequence[str | ProFormaAnnotation], cleave_on: str, restrict_before: str = "", restrict_after: str = "", cterminal: bool = True, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[list[int]]: ...
[docs] def simple_cleavage_sites( sequence: str | ProFormaAnnotation | Sequence[str | ProFormaAnnotation], cleave_on: str, restrict_before: str = "", restrict_after: str = "", cterminal: bool = True, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[int] | list[list[int]]: """ Get cleavage sites using simple amino acid rules. """ if isinstance(sequence, Sequence) and not isinstance(sequence, str) and not isinstance(sequence, ProFormaAnnotation): return parallel_apply_internal( _simple_cleavage_sites, sequence, n_workers=n_workers, chunksize=chunksize, method=method, cleave_on=cleave_on, restrict_before=restrict_before, restrict_after=restrict_after, cterminal=cterminal, ) else: return _simple_cleavage_sites( sequence=sequence, cleave_on=cleave_on, restrict_before=restrict_before, restrict_after=restrict_after, cterminal=cterminal, )
def _digest( sequence: str | ProFormaAnnotation, enzyme_regex: str, missed_cleavages: int = 0, semi: bool = False, min_len: int | None = None, max_len: int | None = None, ) -> list[tuple[str, Span]]: annot = get_annotation_input(sequence, copy=False) return [ (annot[span].serialize(), span) for span in annot.digest( enzyme=enzyme_regex, missed_cleavages=missed_cleavages, semi=semi, min_len=min_len, max_len=max_len, ) ] @overload def digest( sequence: str | ProFormaAnnotation, enzyme_regex: str, missed_cleavages: int = 0, semi: bool = False, min_len: int | None = None, max_len: int | None = None, *, n_workers: None = None, chunksize: None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]]: ... @overload def digest( sequence: Sequence[str | ProFormaAnnotation], enzyme_regex: str, missed_cleavages: int = 0, semi: bool = False, min_len: int | None = None, max_len: int | None = None, *, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[list[tuple[str, Span]]]: ...
[docs] def digest( sequence: str | ProFormaAnnotation | Sequence[str | ProFormaAnnotation], enzyme_regex: str, missed_cleavages: int = 0, semi: bool = False, min_len: int | None = None, max_len: int | None = None, *, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]] | list[list[tuple[str, Span]]]: """ Returns digested sequences using a regular expression to define cleavage sites. """ if isinstance(sequence, Sequence) and not isinstance(sequence, str) and not isinstance(sequence, ProFormaAnnotation): return parallel_apply_internal( _digest, sequence, n_workers=n_workers, chunksize=chunksize, method=method, enzyme_regex=enzyme_regex, missed_cleavages=missed_cleavages, semi=semi, min_len=min_len, max_len=max_len, ) else: return _digest( sequence=sequence, enzyme_regex=enzyme_regex, missed_cleavages=missed_cleavages, semi=semi, min_len=min_len, max_len=max_len, )
def _digest_single( sequence: str | ProFormaAnnotation, cleave_on: str, restrict_before: str = "", restrict_after: str = "", cterminal: bool = True, missed_cleavages: int = 0, semi: bool = False, min_len: int | None = None, max_len: int | None = None, ) -> list[tuple[str, Span]]: annot = get_annotation_input(sequence, copy=False) return [ (annot[span].serialize(), span) for span in annot.simple_digest( cleave_on=cleave_on, restrict_before=restrict_before, restrict_after=restrict_after, cterminal=cterminal, missed_cleavages=missed_cleavages, semi=semi, min_len=min_len, max_len=max_len, ) ] @overload def simple_digest( sequence: str | ProFormaAnnotation, cleave_on: str, restrict_before: str = "", restrict_after: str = "", cterminal: bool = True, missed_cleavages: int = 0, semi: bool = False, min_len: int | None = None, max_len: int | None = None, *, n_workers: None = None, chunksize: None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]]: ... @overload def simple_digest( sequence: Sequence[str | ProFormaAnnotation], cleave_on: str, restrict_before: str = "", restrict_after: str = "", cterminal: bool = True, missed_cleavages: int = 0, semi: bool = False, min_len: int | None = None, max_len: int | None = None, *, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[list[tuple[str, Span]]]: ...
[docs] def simple_digest( sequence: str | ProFormaAnnotation | Sequence[str | ProFormaAnnotation], cleave_on: str, restrict_before: str = "", restrict_after: str = "", cterminal: bool = True, missed_cleavages: int = 0, semi: bool = False, min_len: int | None = None, max_len: int | None = None, *, n_workers: int | None = None, chunksize: int | None = None, method: parallelMethod | parallelMethodLiteral | None = None, ) -> list[tuple[str, Span]] | list[list[tuple[str, Span]]]: """ Returns digested sequences using amino acid specifications with optional restrictions. """ if isinstance(sequence, Sequence) and not isinstance(sequence, str) and not isinstance(sequence, ProFormaAnnotation): return parallel_apply_internal( _digest_single, sequence, n_workers=n_workers, chunksize=chunksize, method=method, cleave_on=cleave_on, restrict_before=restrict_before, restrict_after=restrict_after, cterminal=cterminal, missed_cleavages=missed_cleavages, semi=semi, min_len=min_len, max_len=max_len, ) else: return _digest_single( sequence=sequence, cleave_on=cleave_on, restrict_before=restrict_before, restrict_after=restrict_after, cterminal=cterminal, missed_cleavages=missed_cleavages, semi=semi, min_len=min_len, max_len=max_len, )