Examples

This section provides practical examples demonstrating key features of Peptacular.

ProForma Notation

Basic usage of ProForma notation for representing modified peptides.

"""
ProForma Notation Examples
===========================
Comprehensive examples of supported ProForma 2.1 notation in peptacular.
Demonstrates parsing and serialization of various modification types and features.
"""

import peptacular as pt


def run():
    # ============================================================================
    # BASIC SEQUENCES
    # ============================================================================

    print("=" * 60)
    print("BASIC SEQUENCES")
    print("=" * 60)

    # Simple unmodified peptide
    simple = pt.parse("PEPTIDE")
    print(f"Simple sequence: {simple.serialize()}")

    # ============================================================================
    # TERMINAL MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("TERMINAL MODIFICATIONS")
    print("=" * 60)

    # Both terminals modified
    both = pt.parse("[Acetyl]-PEPTIDE-[Amidated]")
    print(f"Both terminals: {both.serialize()}")

    # Multiple N-terminal modifications
    multi_nterm = pt.parse("[Acetyl][Formyl]-PEPTIDE")
    print(f"Multiple N-term mods: {multi_nterm.serialize()}")

    # ============================================================================
    # INTERNAL MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("INTERNAL MODIFICATIONS")
    print("=" * 60)

    # Multiple different modifications
    multi_internal = pt.parse("PEM[Oxidation]TIS[Phospho]DE")
    print(f"Multiple modifications: {multi_internal.serialize()}")

    # Multiple modifications on same residue
    same_residue = pt.parse("PEM[Oxidation][Dioxidation]TIDE")
    print(f"Multiple mods on M: {same_residue.serialize()}")

    # ============================================================================
    # MODIFICATION NOTATION TYPES
    # ============================================================================

    print("\n" + "=" * 60)
    print("MODIFICATION NOTATION TYPES")
    print("=" * 60)

    # By name (Unimod/PSI-MOD)
    by_name = pt.parse("PEM[Oxidation]TIDE")
    print(f"By name: {by_name.serialize()}")

    # By accession number requires the UNIMOD: or MOD: prefix for Unimod/PSI-MOD respectively
    by_accession = pt.parse("PEM[UNIMOD:35]TIDE")
    print(f"By Unimod accession: {by_accession.serialize()}")

    # By mass (delta mass). requires sign (+/-)
    by_mass = pt.parse("PEM[+15.995]TIDE")
    print(f"By mass shift: {by_mass.serialize()}")
    neg_mass = pt.parse("PEPTIDE[-18.011]")
    print(f"Negative mass shift: {neg_mass.serialize()}")


    # By formula (requires Formula: prefix)
    by_formula = pt.parse("PEM[Formula:O]TIDE")
    print(f"By formula: {by_formula.serialize()}")

    # by glycan composition (requires Glycan: prefix)
    by_glycan = pt.parse("NEEYN[Glycan:Hex5HexNAc4]K")
    print(f"By glycan composition: {by_glycan.serialize()}")


    # ============================================================================
    # CHARGE STATES
    # ============================================================================

    print("\n" + "=" * 60)
    print("CHARGE STATES")
    print("=" * 60)

    # Positive charge
    charged_pos = pt.parse("PEPTIDE/2")
    print(f"Charge +2: {charged_pos.serialize()}")

    # Negative charge
    charged_neg = pt.parse("PEPTIDE/-2")
    print(f"Charge -2: {charged_neg.serialize()}")

    # ============================================================================
    # CHARGE ADDUCTS
    # ============================================================================

    print("\n" + "=" * 60)
    print("CHARGE ADDUCTS")
    print("=" * 60)

    # Single adduct (Total charge = +1)
    na_adduct = pt.parse("PEPTIDE/[Na:z+1]")
    print(f"Sodium adduct: {na_adduct.serialize()}")

    # Multiple copies of same adduct (Total charge = +2)
    multi_adduct = pt.parse("PEPTIDE/[Na:z+1^2]")
    print(f"Two sodium adducts: {multi_adduct.serialize()}")

    # Multiple different adducts (separated by commas) (Total charge = +3)
    mixed_adducts = pt.parse("PEPTIDE/[Na:z+1^2,H:z+1]")
    print(f"Mixed adducts: {mixed_adducts.serialize()}")

    # Metal adduct with charge (Total charge = +2)
    zn_adduct = pt.parse("PEPTIDE/[Zn:z+2]")
    print(f"Zinc adduct (+2): {zn_adduct.serialize()}")

    # ============================================================================
    # LABILE MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("LABILE MODIFICATIONS")
    print("=" * 60)

    labile = pt.parse("{Glycan:Hex}PEPTIDE")
    print(f"Labile glycan: {labile.serialize()}")

    multi_labile = pt.parse("{Phospho}PEPTIDE")
    print(f"Multiple labile: {multi_labile.serialize()}")

    # ============================================================================
    # GLYCAN NOTATION
    # ============================================================================

    print("\n" + "=" * 60)
    print("GLYCAN NOTATION")
    print("=" * 60)

    # Simple glycan
    simple_glycan = pt.parse("NEEYN[Glycan:Hex5HexNAc4]K")
    print(f"N-glycan: {simple_glycan.serialize()}")

    # ============================================================================
    # FIXED/STATIC MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("FIXED/STATIC MODIFICATIONS")
    print("=" * 60)

    # Fixed modification applied to all matching residues (M and T on all positions)
    fixed_mod = pt.parse("<[Oxidation]@M,T>MEMTIMDE")
    print(f"Fixed oxidation on all M and T: {fixed_mod.serialize()}")

    # Multiple fixed modifications
    multi_fixed = pt.parse("<[Oxidation]@M><[Phospho]@S>MSPETIDE")
    print(f"Multiple fixed mods: {multi_fixed.serialize()}")

    # Fixed modification with position rules (N-term Proline)
    fixed_nterm = pt.parse("<[Acetyl]@N-term:P>PEPTIDE")
    print(f"Fixed N-term mod: {fixed_nterm.serialize()}")

    # Fixed modification with position rules (Any C-term)
    fixed_cterm = pt.parse("<[Amidated]@C-term>PEPTIDE")
    print(f"Fixed C-term mod: {fixed_cterm.serialize()}")

    # ============================================================================
    # ISOTOPE LABELING
    # ============================================================================

    print("\n" + "=" * 60)
    print("ISOTOPE LABELING")
    print("=" * 60)

    # C13 labeling (all carbons)
    c13 = pt.parse("<13C>PEPTIDE")
    print(f"C13 labeled: {c13.serialize()}")

    # N15 labeling
    n15 = pt.parse("<15N>PEPTIDE")
    print(f"N15 labeled: {n15.serialize()}")

    # Multiple isotope labels
    multi_isotope = pt.parse("<13C><15N>PEPTIDE")
    print(f"C13 and N15 labeled: {multi_isotope.serialize()}")

    # Deuterium labeling
    deuterium = pt.parse("<2H>PEP[Oxidation]TIDE") 
    print(f"Deuterium labeled: {deuterium.serialize()}")

    # ============================================================================
    # AMBIGUOUS MODIFICATIONS (UNKNOWN POSITION)
    # ============================================================================

    print("\n" + "=" * 60)
    print("AMBIGUOUS MODIFICATIONS (UNKNOWN POSITION)")
    print("=" * 60)

    # Unknown position
    unknown_pos = pt.parse("[Phospho]?PEPTIDE")
    print(f"Phospho somewhere: {unknown_pos.serialize()}")

    # Multiple unknown modifications (Support caret for specifying multiple occurrences)
    multi_unknown = pt.parse("[Phospho]^2[Acetyl]?PEPTIDE")
    print(f"Multiple unknown: {multi_unknown.serialize()}")

    # ============================================================================
    # INTERVAL NOTATION (AMBIGUOUS LOCALIZATION)
    # ============================================================================

    print("\n" + "=" * 60)
    print("INTERVAL NOTATION (LOCALIZATION RANGES)")
    print("=" * 60)

    # Modification in a range (1-indexed, inclusive)
    interval = pt.parse("P(EP)[Phospho]TIDE")
    print(f"Phospho in positions 1-3: {interval.serialize()}")

    # Ambiguous interval (EP or PT or something with similar mass)
    ambiguous_interval = pt.parse("P(?EP)[Phospho]TIDE")
    print(f"Ambiguous intervals: {ambiguous_interval.serialize()}")


    # ============================================================================
    # INFO TAGS
    # ============================================================================

    print("\n" + "=" * 60)
    print("INFO TAGS (NON-MODIFICATION ANNOTATIONS)")
    print("=" * 60)

    # Info tag (no mass contribution)
    info_tag = pt.parse("PEPT[INFO:test]IDE")
    print(f"Info tag: {info_tag.serialize()}")


    # ============================================================================
    # PEPTIDE NAMING
    # ============================================================================

    print("\n" + "=" * 60)
    print("PEPTIDE NAMING")
    print("=" * 60)

    # Peptidoform name
    peptide_name = pt.parse("(>MyPeptide)PEPTIDE")
    print(f"Peptide name: {peptide_name.serialize()}")


    # ============================================================================
    # Multiple FEATURES COMBINED
    # ============================================================================


    print("\n" + "=" * 60)
    print("MULTIPLE FEATURES COMBINED")
    print("=" * 60)

    # Combined info tag and modification
    multi_info = pt.parse("PEPT[Phospho|INFO:quality=high]IDE")
    print(f"Info + modification: {multi_info.serialize()}")

    # Technically this is valid but no reason to do this. Peptacular only looks at the first modification in such cases.
    multi_annot2 = pt.parse("PEPT[Phospho|Oxidation|+76.0]IDE")
    print(f"Info + modification: {multi_annot2.serialize()}")


if __name__ == "__main__":
    run()

Mass, m/z, and Composition

Calculate masses, m/z ratios, and elemental compositions.

"""
Mass and Composition Calculations
==================================
Examples of calculating mass, m/z, and elemental composition from ProForma annotations.
"""

import peptacular as pt


def run():
    # Parse a simple peptide sequence
    annot = pt.parse("PEPTIDE")

    # ============================================================================
    # MASS CALCULATIONS
    # ============================================================================

    print("=" * 60)
    print("MASS CALCULATIONS")
    print("=" * 60)

    # --- Basic Mass Calculation ---
    # Default is monoisotopic precursor mass (includes terminal groups H and OH)
    mass = annot.mass()
    print(f"Default mass: {mass:.4f} Da")  # 799.3600

    # Explicitly specify precursor ion type
    mass_p = annot.mass(ion_type="p")
    print(f"Precursor mass: {mass_p:.4f} Da")

    # --- Neutral Mass (no terminal groups) ---
    neutral = annot.mass(ion_type=pt.IonType.NEUTRAL)
    print(f"Neutral mass: {neutral:.4f} Da")

    # --- m/z Calculation ---
    # mz() divides mass by charge
    mz_2plus = annot.mz(charge=2)
    assert mz_2plus == annot.mass(charge=2) / 2
    print(f"m/z at charge +2: {mz_2plus:.4f}")

    # --- Monoisotopic vs Average Mass ---
    mono_mass = annot.mass(monoisotopic=True)  # default
    avg_mass = annot.mass(monoisotopic=False)
    print(f"Monoisotopic: {mono_mass:.4f} Da")
    print(f"Average: {avg_mass:.4f} Da")

    # --- Charge States ---
    # Integer charge assumes protonation/deprotonation
    mass_2plus = annot.mass(charge=2)
    mass_2minus = annot.mass(charge=-2)
    print(f"Mass at +2 charge: {mass_2plus:.4f} Da")
    print(f"Mass at -2 charge: {mass_2minus:.4f} Da")

    # Adduct charges (overrides annotation charge)
    mass_na = annot.mass(charge="Na:z+1")
    mass_multi_adduct = annot.mass(charge=("Na:z+1^2", "H:z+1"))
    print(f"Mass with Na+ adduct: {mass_na:.4f} Da")
    print(f"Mass with multiple adducts: {mass_multi_adduct:.4f} Da")

    # --- Isotopes ---
    # Integer assumes C13 isotopes
    mass_c13 = annot.mass(isotopes=1)
    print(f"Mass with 1x 13C: {mass_c13:.4f} Da")

    # Custom isotope specification
    mass_custom_iso = annot.mass(isotopes={"17O": 2, "13C": 1})
    print(f"Mass with 2x 17O and 1x 13C: {mass_custom_iso:.4f} Da")

    # --- Neutral Losses ---
    # Single loss
    mass_water_loss = annot.mass(deltas={"H2O": 1})
    print(f"Mass with H2O loss: {mass_water_loss:.4f} Da")

    # Multiple losses
    mass_multi_loss = annot.mass(
        deltas={pt.NeutralDelta.WATER: 1, pt.NeutralDelta.AMMONIA: 2}
    )
    print(f"Mass with H2O + 2×NH3 loss: {mass_multi_loss:.4f} Da")

    # ============================================================================
    # COMPOSITION CALCULATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("COMPOSITION CALCULATIONS")
    print("=" * 60)

    # --- Basic Composition ---
    # Returns a Counter of ElementInfo objects
    comp = annot.comp()
    print("\nFull composition (ElementInfo objects):")
    for elem, count in comp.items():
        print(f"  {elem.symbol}: {count}")

    # Convert to simple string representation
    comp_str = {str(elem): count for elem, count in comp.items()}
    print(f"\nSimple composition: {comp_str}")

    # --- Composition with Modifications ---
    # Apply charge and isotopes
    comp_modified = annot.comp(charge="Na:z+1", isotopes={"17O": 2, "13C": 1})
    comp_modified_str = {str(elem): count for elem, count in comp_modified.items()}
    print(f"\nModified composition: {comp_modified_str}")

    # ============================================================================
    # WITH GLOBAL ISOTOPE MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("GLOBAL ISOTOPE MODIFICATIONS")
    print("=" * 60)

    # Applies the global isotope to all residues
    iso_annot = pt.parse("<13C>PEPTIDE")
    iso_comp = iso_annot.comp()
    iso_comp_str = {str(elem): count for elem, count in iso_comp.items()}
    print(f"Composition with global 13C: {iso_comp_str}")

    # will also apply isotopes to modifications where available
    # Charge is applied after isotopes so will reflect in composition
    mod_iso_annot = pt.parse("<2H>PEPT[Phospho]IDE/2")
    mod_iso_comp = mod_iso_annot.comp()
    mod_iso_comp_str = {str(elem): count for elem, count in mod_iso_comp.items()}
    print(f"Composition with global 13C and Phospho mod: {mod_iso_comp_str}")


if __name__ == "__main__":
    run()

Annotation

Parse and work with ProForma annotations.

"""
ProForma Annotation Examples
=============================
Basic examples of parsing, serializing, and manipulating ProForma annotations.
"""

import peptacular as pt


def run():
    # ============================================================================
    # PARSING ANNOTATIONS
    # ============================================================================

    # Simple sequence
    simple: pt.ProFormaAnnotation = pt.parse("PEPTIDE")
    print(f"Simple: {simple.serialize()}")

    # Chimeric sequence
    chimeric: list[pt.ProFormaAnnotation] = pt.parse_chimeric("PEPTIDE+PEPTIDE")
    print(f"Chimeric: {pt.serialize_chimeric(chimeric)}")

    # ============================================================================
    # CREATING ANNOTATIONS PROGRAMMATICALLY
    # ============================================================================

    # Create from scratch
    annot = pt.ProFormaAnnotation(sequence="PEPTIDE", charge=2)
    print(f"New annotation: {annot.serialize()}")

    # Set internal Mods... it takes a dict of position -> {mod: count}
    annot = pt.ProFormaAnnotation(
        sequence="PEPTIDE", charge=2, internal_mods={2: {"Oxidation": 1}}
    )
    print(f"New annotation: {annot.serialize()}")

    # Other modications are just {mod: count}
    annot = pt.ProFormaAnnotation(
        sequence="PEPTIDE",
        nterm_mods={"Acetyl": 1},
        internal_mods={2: {"Oxidation": 1, "Phospho": 1}},
        charge=2,
    )
    print(f"New annotation: {annot.serialize()}")

    # ============================================================================
    # ACCESSING PROPERTIES
    # ============================================================================

    print("\n" + "=" * 60)
    print("ACCESSING PROPERTIES")
    print("=" * 60)

    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIS[Phospho]DE/2")
    print(f"Annotation: {annot.serialize()}\n")

    print(f"Sequence: {annot.sequence}")
    print(f"Length: {len(annot)}")
    print(f"Charge state: {annot.charge_state}")
    print(f"Has N-term mods: {annot.has_nterm_mods}")
    print(f"Has internal mods: {annot.has_internal_mods}")
    print(f"Has charge: {annot.has_charge}")

    # ============================================================================
    # SETTING MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("SETTING MODIFICATIONS")
    print("=" * 60)

    # Start fresh
    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")

    # Set N-terminal modification
    annot.set_nterm_mods({"Acetyl": 1})
    print(f"After N-term: {annot.serialize()}")

    # Set internal modification at specific position
    annot.set_internal_mods_at_index(2, {"Oxidation": 1})
    print(f"After internal: {annot.serialize()}")

    # Set charge
    annot.set_charge(2)
    print(f"After charge: {annot.serialize()}")

    # ============================================================================
    # APPENDING MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("APPENDING MODIFICATIONS")
    print("=" * 60)

    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")

    # Append N-terminal mod
    annot.append_nterm_mod("Acetyl")
    print(f"Append N-term: {annot.serialize()}")

    # Append internal mod
    annot.append_internal_mod_at_index(2, "Oxidation")
    print(f"Append internal: {annot.serialize()}")

    # Append another internal mod at same position
    annot.append_internal_mod_at_index(2, "Phospho")
    print(f"Append another: {annot.serialize()}")

    # ============================================================================
    # EXTENDING MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("EXTENDING MODIFICATIONS")
    print("=" * 60)

    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")

    # Extend with multiple mods
    annot.extend_nterm_mods(["Acetyl", "Formyl"])
    print(f"Extend N-term: {annot.serialize()}")

    # Extend internal mods at position
    annot.extend_internal_mods_at_index(2, ["Oxidation", "Phospho"])
    print(f"Extend internal: {annot.serialize()}")

    # ============================================================================
    # REMOVING MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("REMOVING MODIFICATIONS")
    print("=" * 60)

    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIS[Phospho]DE/2")
    print(f"Original: {annot.serialize()}")

    # Clear specific mod type (removes all)
    annot.clear_nterm_mods()
    print(f"Clear N-term: {annot.serialize()}")

    # Clear internal mod at position (removes all at that position)
    annot.clear_internal_mod_at_index(2)
    print(f"Clear position 2: {annot.serialize()}")

    # Clear all mods
    annot.clear_mods()
    print(f"Clear all: {annot.serialize()}")

    # ============================================================================
    # DECREMENTING MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("DECREMENTING MODIFICATIONS")
    print("=" * 60)

    # When you have multiple copies of a mod, remove() decrements the count
    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
    annot.extend_nterm_mods(["Acetyl", "Acetyl", "Formyl"])
    print(f"Original: {annot.serialize()}")

    # Remove one Acetyl (decrements count)
    annot.remove_nterm_mod("Acetyl")
    print(f"After removing 1 Acetyl: {annot.serialize()}")

    # Remove another Acetyl
    annot.remove_nterm_mod("Acetyl")
    print(f"After removing another Acetyl: {annot.serialize()}")

    # Remove Formyl
    annot.remove_nterm_mod("Formyl")
    print(f"After removing Formyl: {annot.serialize()}")

    # Works with internal mods too
    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
    annot.extend_internal_mods_at_index(2, ["Oxidation", "Oxidation", "Phospho"])
    print(f"\nWith internal mods: {annot.serialize()}")

    annot.remove_internal_mod_at_index(2, "Oxidation")
    print(f"Remove 1 Oxidation: {annot.serialize()}")

    annot.remove_internal_mod_at_index(2, "Oxidation")
    print(f"Remove another Oxidation: {annot.serialize()}")

    # ============================================================================
    # POPPING MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("POPPING MODIFICATIONS")
    print("=" * 60)

    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE/2")
    print(f"Original: {annot.serialize()}")

    # Pop N-term mods (returns the mods)
    nterm = annot.pop_nterm_mods()
    print(f"Popped N-term: {nterm}")
    print(f"After pop: {annot.serialize()}")

    # Pop charge
    charge = annot.pop_charge()
    print(f"Popped charge: {charge}")
    print(f"After pop charge: {annot.serialize()}")

    # ============================================================================
    # WORKING WITH STATIC MODS
    # ============================================================================

    print("\n" + "=" * 60)
    print("STATIC MODIFICATIONS")
    print("=" * 60)

    # Add static mod by residue
    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
    annot.add_static_mod_by_residue("E", "Oxidation")
    print(f"Static mod on E: {annot.serialize()}")

    # Condense to internal mods
    annot.condense_static_mods()
    print(f"Condensed: {annot.serialize()}")

    # ============================================================================
    # SLICING
    # ============================================================================

    print("\n" + "=" * 60)
    print("SLICING")
    print("=" * 60)

    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
    print(f"Original: {annot.serialize()}")

    # Slice using indices
    sub = annot[2:5]
    print(f"Slice [2:5]: {sub.serialize()}")

    # Slice preserves modifications
    sub_with_mod = annot[1:4]
    print(f"Slice [1:4]: {sub_with_mod.serialize()}")

    # ============================================================================
    # COPYING
    # ============================================================================

    print("\n" + "=" * 60)
    print("COPYING")
    print("=" * 60)

    annot = pt.parse("PEM[Oxidation]TIDE")
    print(f"Original: {annot.serialize()}")

    # Make a copy
    copy = annot.copy()
    copy.append_nterm_mod("Acetyl")
    print(f"Copy modified: {copy.serialize()}")
    print(f"Original unchanged: {annot.serialize()}")

    # ============================================================================
    # CHECKING MODIFICATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("CHECKING FOR MODIFICATIONS")
    print("=" * 60)

    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE/2")
    print(f"Annotation: {annot.serialize()}\n")

    # Check for specific mod types
    print(f"Has N-term mods: {annot.has_nterm_mods}")
    print(f"Has C-term mods: {annot.has_cterm_mods}")
    print(f"Has internal mods: {annot.has_internal_mods}")
    print(f"Has charge: {annot.has_charge}")

    # Check if has any mods
    print(f"Has any mods: {annot.has_mods()}")
    print(
        f"Has internal/charge: {annot.has_mods([pt.ModType.INTERNAL, pt.ModType.CHARGE])}"
    )
    print(f"Has internal/charge: {annot.has_mods(['internal', 'charge'])}")

    # ============================================================================
    # SERIALIZATION OPTIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("SERIALIZATION")
    print("=" * 60)

    annot = pt.parse("[Acetyl]-PEPTIDE/2")

    # Full serialization
    print(f"Full: {annot.serialize()}")

    # Strip mods before serializing
    stripped = annot.copy().strip_mods()
    print(f"Stripped: {stripped.serialize()}")

    # ============================================================================
    # VALIDATION
    # ============================================================================

    print("\n" + "=" * 60)
    print("VALIDATION")
    print("=" * 60)

    # By default, validation is OFF for performance
    annot_no_val = pt.ProFormaAnnotation(sequence="PEPTIDE", validate=False)
    print(f"No validation (default): {annot_no_val.serialize()}")

    # Enable validation when creating (and for methods that modify the annotation)
    annot_with_val = pt.ProFormaAnnotation(sequence="PEPTIDE", validate=True)
    print(f"With validation: {annot_with_val.serialize()}")

    # Validation checks modification syntax (can be disabled per method)
    print("\nAttempting to add invalid modification with validation ON:")
    try:
        annot_with_val.append_internal_mod_at_index(0, "InvalidMod123")
        print(f"  Error: Modification added unexpectedly: {annot_with_val.serialize()}")
    except Exception as e:
        # successfully raises error
        print(f"Successfully caught error: {e}")

    # Validation checks modification syntax (can be disabled per method)
    print("\nAttempting to add invalid modification with validation OFF:")
    try:
        annot_with_val.append_internal_mod_at_index(0, "InvalidMod123", validate=False)
        print(f"  Success (no validation): {annot_no_val.serialize()}")
    except Exception as e:
        print(f"  Error: {e}")


if __name__ == "__main__":
    run()

Annotation with Modification Objects

Advanced annotation examples using modification objects.

"""
ProForma Mod Objects Examples
==============================
Demonstrates working with Mods and Mod objects returned from ProForma annotations.
"""

import peptacular as pt


def run():
    # ============================================================================
    # ACCESSING MODS OBJECTS
    # ============================================================================

    # Parse a ProForma annotation with various modifications
    annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation][Phospho]TIS[Phospho]DE/2")
    print(f"Annotation: {annot.serialize()}\n")

    # Access different mod collections - these return Mods objects
    print("N-terminal mods:", annot.nterm_mods)
    print("Internal mods at pos 2:", annot.get_internal_mods_at_index(2))
    print("Internal mods at pos 5:", annot.get_internal_mods_at_index(5))

    # ============================================================================
    # ITERATING OVER MODS
    # ============================================================================

    print("\n" + "=" * 60)
    print("ITERATING OVER MODS")
    print("=" * 60)

    annot = pt.parse("[Acetyl][Acetyl][Formyl]-PEPTIDE")
    nterm = annot.nterm_mods

    print(f"N-terminal mods: {nterm}\n")

    # Iterate through Mod objects
    print("Individual Mod objects:")
    for mod in nterm:
        print(f"  {mod.value} (count: {mod.count})")

    # ============================================================================
    # WORKING WITH MOD OBJECTS
    # ============================================================================

    print("\n" + "=" * 60)
    print("MOD OBJECT PROPERTIES")
    print("=" * 60)

    annot = pt.parse("PEM[Oxidation][Oxidation][Phospho]TIDE")
    internal_mods = annot.get_internal_mods_at_index(2)

    print(f"Internal mods at position 2: {internal_mods}\n")

    for mod in internal_mods:
        print(f"Modification: {mod.value}")
        print(f"  Count: {mod.count}")
        print(f"  Mass (mono): {mod.get_mass(monoisotopic=True):.4f}")
        print(f"  Mass (avg): {mod.get_mass(monoisotopic=False):.4f}")
        print(f"  Composition: {mod.get_composition()}")
        print(f"  Charge: {mod.get_charge()}")
        print()

    # ============================================================================
    # ACCESSING PARSED MOD VALUES
    # ============================================================================

    print("\n" + "=" * 60)
    print("PARSED MOD VALUES")
    print("=" * 60)

    annot = pt.parse("PEM[Oxidation]TIS[Phospho]DE")

    # Get mods at position 2 (M with Oxidation)
    mods_at_2 = annot.get_internal_mods_at_index(2)
    print(f"Mods at position 2: {mods_at_2}\n")

    # Access parsed items as (modification, count) tuples
    print("Parsed items:")
    for mod_value, count in mods_at_2.parse_items():
        print(f"  {mod_value} × {count}")
        print(f"    Type: {type(mod_value)}")

    # ============================================================================
    # CHECKING MOD PRESENCE
    # ============================================================================

    print("\n" + "=" * 60)
    print("CHECKING MOD PRESENCE")
    print("=" * 60)

    annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation]TIDE")
    nterm = annot.nterm_mods

    print(f"N-terminal mods: {nterm}\n")
    print(f"Contains 'Acetyl': {'Acetyl' in nterm}")
    print(f"Contains 'Phospho': {'Phospho' in nterm}")
    print(f"Contains 'Formyl': {'Formyl' in nterm}")

    # ============================================================================
    # WORKING WITH DIFFERENT MOD TYPES
    # ============================================================================

    print("\n" + "=" * 60)
    print("DIFFERENT MOD TYPES")
    print("=" * 60)

    # Isotope modifications
    annot = pt.parse("<15N>PEPTIDE")
    isotope = annot.isotope_mods
    print(f"Isotope mods: {isotope}")
    print(f"  Type: {isotope.mod_type}")
    print(f"  Serialized: {isotope.serialize()}\n")

    # Static modifications
    annot = pt.parse("<[Carbamidomethyl]@C>PEPTCDE")
    static = annot.static_mods
    print(f"Static mods: {static}")
    print(f"  Type: {static.mod_type}")
    print(f"  Serialized: {static.serialize()}\n")

    # Labile modifications
    annot = pt.parse("{Glycan:Hex}PEPTIDE")
    labile = annot.labile_mods
    print(f"Labile mods: {labile}")
    print(f"  Type: {labile.mod_type}")
    print(f"  Serialized: {labile.serialize()}\n")

    # Charge adducts
    annot = pt.parse("PEPTIDE/[Na:z+1]")
    charge = annot.charge_adducts
    print(f"Charge adducts: {charge}")
    print(f"  Type: {charge.mod_type}")
    print(f"  Serialized: {charge.serialize()}")

    # ============================================================================
    # MOD COPYING
    # ============================================================================

    print("\n" + "=" * 60)
    print("COPYING MOD OBJECTS")
    print("=" * 60)

    annot = pt.parse("[Acetyl]-PEPTIDE")
    nterm = annot.nterm_mods

    print(f"Original: {nterm}")

    # Copy a Mods collection
    nterm_copy = nterm.copy()
    print(f"Copy: {nterm_copy}")
    print(f"Are they equal: {nterm._mods == nterm_copy._mods}")
    print(f"Are they the same object: {nterm is nterm_copy}")

    # ============================================================================
    # WORKING WITH MULTIPLE MODS AT SAME POSITION
    # ============================================================================

    print("\n" + "=" * 60)
    print("MULTIPLE MODS AT SAME POSITION")
    print("=" * 60)

    annot = pt.parse("PEM[Oxidation][Oxidation][Phospho][Acetyl]TIDE")
    mods = annot.get_internal_mods_at_index(2)

    print(f"Mods at position 2: {mods}\n")

    print("Individual modifications:")
    for mod in mods:
        print(f"  {mod.value} × {mod.count}")

    print(f"\nTotal mass contribution: {mods.get_mass():.4f}")
    print(f"Total composition: {mods.get_composition()}")

    # ============================================================================
    # ACCESSING UNDERLYING PROFORMA COMPONENTS
    # ============================================================================

    print("\n" + "=" * 60)
    print("UNDERLYING PROFORMA COMPONENTS")
    print("=" * 60)

    # Different modification formats
    annot1 = pt.parse("PEM[Oxidation]TIDE")
    annot2 = pt.parse("PEM[UNIMOD:35]TIDE")
    annot3 = pt.parse("PEM[+15.995]TIDE")
    annot4 = pt.parse("PEM[Formula:O]TIDE")

    print("Different representations of Oxidation:\n")

    for i, annot in enumerate([annot1, annot2, annot3, annot4], 1):
        mods = annot.get_internal_mods_at_index(2)
        for mod in mods:
            print(f"{i}. {annot.serialize()}")
            print(f"   Parsed value: {mod.value}")
            print(f"   Type: {type(mod.value).__name__}")
            print(f"   Mass: {mod.get_mass():.4f}\n")

    # ============================================================================
    # VALIDATING MODS
    # ============================================================================

    print("=" * 60)
    print("VALIDATING MODS")
    print("=" * 60)

    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
    nterm = annot.nterm_mods
    internal = annot.get_internal_mods_at_index(2)

    print(f"N-terminal mods valid: {nterm.is_valid}")
    print(f"Validation result: {nterm.validate()}")

    print(f"\nInternal mods valid: {internal.is_valid}")
    print(f"Validation result: {internal.validate()}")

    # ============================================================================
    # WORKING WITH INTERVALS
    # ============================================================================

    print("\n" + "=" * 60)
    print("INTERVAL MODIFICATIONS")
    print("=" * 60)

    annot = pt.parse("PEP(TIS)[Phospho]DE")

    print(f"Annotation: {annot.serialize()}\n")

    if annot.has_intervals:
        print("Intervals:")
        for interval in annot.intervals:
            print(f"  Range: {interval.start}-{interval.end}")
            print(f"  Ambiguous: {interval.ambiguous}")
            print(f"  Has mods: {interval.has_mods}")
            if interval.has_mods:
                print(f"  Mods: {interval.mods}")
                for mod in interval.mods:
                    print(f"    {mod.value} × {mod.count}")

    # ============================================================================
    # SERIALIZATION
    # ============================================================================

    print("\n" + "=" * 60)
    print("SERIALIZATION")
    print("=" * 60)

    annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation][Phospho]TIDE/2")

    print("Full annotation:", annot.serialize())
    print("\nIndividual mod serialization:")
    print(f"  N-term: {annot.nterm_mods.serialize()}")
    print(f"  Position 2: {annot.get_internal_mods_at_index(2).serialize()}")

    # Show how mods serialize differently based on type
    print("\nMod type serialization patterns:")
    examples = [
        ("[Acetyl]-PEPTIDE", "nterm_mods"),
        ("PEPTIDE-[Amidated]", "cterm_mods"),
        ("{Glycan:Hex}PEPTIDE", "labile_mods"),
        ("[Phospho]?PEPTIDE", "unknown_mods"),
        ("<15N>PEPTIDE", "isotope_mods"),
        ("PEPTIDE/[Na:z+1]", "charge_adducts"),
    ]

    for proforma, attr in examples:
        annot = pt.parse(proforma)
        mods = getattr(annot, attr)
        print(f"  {proforma:30s} -> {mods.serialize()}")


if __name__ == "__main__":
    run()

Digestion

Digest protein sequences with various proteases.

"""
Protein Digestion Examples
===========================
Simple examples of in-silico enzymatic digestion using ProForma.
All digestion methods return Span objects (start, end, missed_cleavages).
Use annotation[span] to get the actual peptide.
"""

import peptacular as pt


def run():
    # ============================================================================
    # SIMPLE DIGESTION (AA Based)
    # ============================================================================

    protein = pt.parse("[Amidated]-PEPTIDEKPEPTIDERPEPT[Phospho]IDER-[+57]")

    print("=" * 60)
    print("SIMPLE DIGESTION (AA BASED)")
    print("=" * 60)
    print(f"Protein: {protein}\n")

    # Basic trypsin-like digestion
    print("Trypsin-like (cleave after K/R):")
    for span in protein.simple_digest(cleave_on="KR"):
        peptide = protein[span]
        print(f"  {peptide.serialize()} - span: {span}")

    # With restrictions
    print("\nWith restrictions (cleave after K/R, but not before N or after P):")
    for span in protein.simple_digest(
        cleave_on="KR", restrict_before="N", restrict_after="P", cterminal=True
    ):
        print(f"  {protein[span].serialize()}")

    # ============================================================================
    # DIGESTION (REGEX BASED)
    # ============================================================================

    print("\n" + "=" * 60)
    print("DIGESTION (REGEX BASED)")
    print("=" * 60)

    # Using predefined enzyme enum
    print("\nUsing Proteases enum:")
    for span in protein.digest(pt.Proteases.TRYPSIN):
        print(f"  {protein[span].serialize()}")

    # Using enzyme string
    print("\nUsing enzyme string 'trypsin':")
    for span in protein.digest("trypsin"):
        print(f"  {protein[span].serialize()}")

    # Custom regex
    print("\nCustom regex (cleave after A or E):")
    for span in protein.digest("(?<=[AE])"):
        print(f"  {protein[span].serialize()}")

    # ============================================================================
    # CLEAVAGE SITES
    # ============================================================================

    print("\n" + "=" * 60)
    print("CLEAVAGE SITES")
    print("=" * 60)

    print("\nCleavage positions for trypsin (after K/R):")
    sites = list(
        protein.simple_cleavage_sites(
            cleave_on="KR",
            restrict_after="P",
            restrict_before="N",
            cterminal=True,
        )
    )
    print(f"  Sites: {sites}")
    print(f"  Sequence: {protein.sequence}")
    print(
        f"            {''.join('^' if i in sites else ' ' for i in range(len(protein.sequence)))}"
    )

    print("\nCleavage positions for included trypsin regex:")
    # can also use Proteases.TRYPSIN or custom regex
    sites_regex = list(protein.cleavage_sites("trypsin"))
    print(f"  Sites: {sites_regex}")

    # ============================================================================
    # MISSED CLEAVAGES
    # ============================================================================

    print("\n" + "=" * 60)
    print("MISSED CLEAVAGES")
    print("=" * 60)

    print("\nWith 1 missed cleavage:")
    for span in protein.digest("trypsin", missed_cleavages=1):
        print(f"  {protein[span].serialize()}")

    # ============================================================================
    # LENGTH FILTERING
    # ============================================================================

    print("\n" + "=" * 60)
    print("LENGTH FILTERING")
    print("=" * 60)

    print("\nPeptides between 7-15 amino acids:")
    for span in protein.digest("trypsin", min_len=7, max_len=15):
        peptide = protein[span]
        print(f"  {peptide.serialize()} (length: {len(peptide)})")

    # ============================================================================
    # SEMI-ENZYMATIC DIGESTION
    # ============================================================================

    print("\n" + "=" * 60)
    print("SEMI-ENZYMATIC")
    print("=" * 60)

    print("\nSemi-enzymatic (one end must be enzymatic):")
    for span in protein.digest("trypsin", semi=True, min_len=5, max_len=10):
        print(f"  {protein[span].serialize()}")


if __name__ == "__main__":
    run()

FASTA Digestion

Parse protein sequences from a FASTA file and digest each one.

"""
FASTA Digestion Example
========================
Parse protein sequences from a FASTA file and digest each one with trypsin.
"""

import os
import tempfile

import peptacular as pt

FASTA_TEXT = """\
>sp|P1|EXAMPLE1 Example protein one
MPEPTIDEKAGVSEQR
>sp|P2|EXAMPLE2 Example protein two
MSEQKGARVTDEPTIDER
"""


def run():
    # Write the FASTA text to a real file so pt.parse_fasta can read it from disk. Use a
    # temporary directory so the file is cleaned up automatically when the block exits.
    with tempfile.TemporaryDirectory() as tmp_dir:
        fasta_path = os.path.join(tmp_dir, "example.fasta")
        with open(fasta_path, "w") as f:
            f.write(FASTA_TEXT)

        records = pt.parse_fasta(fasta_path)

        for record in records:
            protein = pt.parse(record.sequence)
            print(f"\n{record.header} ({record.sequence})")
            for span in protein.digest(pt.Proteases.TRYPSIN, missed_cleavages=1, min_len=4):
                peptide = protein[span]
                print(f"  {peptide.serialize()}  mass={peptide.mass():.4f}")


if __name__ == "__main__":
    run()

Fragmentation

Generate theoretical fragment ions for peptides.

"""
Fragment Generation Examples
=============================
Examples of generating fragment ions from ProForma annotations.
All fragment methods return Fragment objects with mass, m/z, and composition.
"""

import peptacular as pt


def run():
    # ============================================================================
    # BASIC FRAGMENTATION
    # ============================================================================

    peptide = pt.parse("PEPT[Phospho]IDE-[Acetyl]")

    print("=" * 60)
    print("BASIC FRAGMENTATION")
    print("=" * 60)
    print(f"Peptide: {peptide}\n")

    # --- b-ions (N-terminal fragments) ---
    print("b-ions (N-terminal):")
    for frag in peptide.fragment(ion_types=["b"]):
        print(f"  {frag}")

    # --- y-ions (C-terminal fragments) ---
    print("\ny-ions (C-terminal):")
    for frag in peptide.fragment(ion_types=["y"]):
        print(f"  {frag}")

    # ============================================================================
    # FRAGMENT ION TYPES
    # ============================================================================

    print("\n" + "=" * 60)
    print("DIFFERENT ION TYPES")
    print("=" * 60)

    # Generate multiple ion types at once
    print("\na, b, c ions:")
    for frag in peptide.fragment(ion_types=["a", "b", "c"]):
        print(f"  {frag}")

    print("\nx, y, z ions:")
    for frag in peptide.fragment(ion_types=["x", "y", "z"]):
        print(f"  {frag}")

    # ============================================================================
    # CHARGED FRAGMENTS
    # ============================================================================

    print("\n" + "=" * 60)
    print("CHARGED FRAGMENTS")
    print("=" * 60)

    # Charge state
    print("\nb-ions at +2 charge:")
    for frag in peptide.fragment(ion_types=["b"], charges=[2]):
        print(f"  {frag}")

    # Adduct charges
    print("\ny-ions with Na+ adduct:")
    for frag in peptide.fragment(ion_types=["y"], charges=["Na:z+1"]):
        print(f"  {frag}")

    # ============================================================================
    # DELTAS (User Specified)
    # ============================================================================

    print("\n" + "=" * 60)
    print("DELTAS")
    print("=" * 60)

    # -18 loss applied to all ions
    print("\ny-ions with -18 loss:")
    for frag in peptide.fragment(
        ion_types=["y"],
        deltas=[-18.0],  # Custom delta of -18.0 Da
    ):
        print(f"  {frag}")

    # By default deltas is (None,) so to also generate fragments with no losses you must include None
    print("\ny-ions with -18 and No loss:")
    for frag in peptide.fragment(
        ion_types=["y"],
        deltas=[-18.0, None],  # Custom delta of -18.0 Da and no loss
    ):
        print(f"  {frag}")

    # ============================================================================
    # NEUTRAL DELTAS
    # ============================================================================

    # in addition neutral deltas can be specified to apply common losses like H2O or NH3 to appropriate fragments
    # these work in addition to any custom deltas specified above

    print("\n" + "=" * 60)
    print("NEUTRAL DELTAS")
    print("=" * 60)

    # Water loss (Selectively applied to fragments that can lose H2O (containing ["S", "T", "D", "E"]))
    print("\ny-ions with H2O loss:")
    for frag in peptide.fragment(
        ion_types=["y"],
        neutral_deltas=["H2O"],
        max_ndeltas=2,
    ):
        print(f"  {frag}")

    # Multiple losses. Can also specify neutral deltas as their enum types
    print("\nb-ions with H2O and NH3 loss:")
    for frag in peptide.fragment(
        ion_types=["b"],
        neutral_deltas=[pt.NeutralDelta.WATER, pt.NeutralDelta.AMMONIA],
        max_ndeltas=2,
    ):
        print(f"  {frag}")

    # ============================================================================
    # ISOTOPES
    # ============================================================================

    print("\n" + "=" * 60)
    print("ISOTOPIC FRAGMENTS")
    print("=" * 60)

    # C13 isotopes
    print("\ny-ions with 1x 13C:")
    for frag in peptide.fragment(ion_types=["y"], isotopes=[1]):
        print(f"  {frag}")

    # Custom isotopes
    print("\nb-ions with 2x 17O:")
    for frag in peptide.fragment(ion_types=["b"], isotopes=[{"17O": 2}]):
        print(f"  {frag}")

    # ============================================================================
    # INTERNAL FRAGMENTS
    # ============================================================================

    print("\n" + "=" * 60)
    print("INTERNAL FRAGMENTS")
    print("=" * 60)

    print("\nInternal fragments (min_len=3, max_len=5):")
    for frag in peptide.fragment(ion_types=["ax"]):
        if frag.position and isinstance(frag.position, tuple):
            start, end = frag.position
            if 3 <= (end - start) <= 5:
                print(f"  {frag}")

    # ============================================================================
    # IMMONIUM IONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("IMMONIUM IONS")
    print("=" * 60)

    print("\nImmonium ions:")
    for frag in peptide.fragment(ion_types=["i"]):
        print(f"  {frag}")

    # ============================================================================
    # PRECURSOR ION
    # ============================================================================

    print("\n" + "=" * 60)
    print("PRECURSOR ION")
    print("=" * 60)

    print("\nPrecursor ion at +2 charge:")
    for frag in peptide.fragment(ion_types=["p"], charges=[2]):
        print(f"  {frag}")

    # ============================================================================
    # COMBINING OPTIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("COMBINING OPTIONS")
    print("=" * 60)

    print("\ny-ions: +2 charge, H2O loss, 1x 13C:")
    for frag in peptide.fragment(ion_types=["y"], charges=[2], neutral_deltas=["H2O"], isotopes=[1]):
        print(f"  {frag}")

    # ============================================================================
    # ACCESSING FRAGMENT PROPERTIES
    # ============================================================================

    print("\n" + "=" * 60)
    print("FRAGMENT PROPERTIES")
    print("=" * 60)

    """
    Unless otherwise specified, fragments do not include sequence or composition data.
    This can be enabled with the `include_sequence` and `calculate_composition` flags.
    """
    b_ions: list[pt.Fragment] = peptide.fragment(ion_types=["b"], charges=[2], calculate_composition=True)
    if len(b_ions) > 0:
        frag = b_ions[0]
        print(f"\nExample fragment: {frag}")
        print(f"  Ion type: {frag.ion_type}")
        print(f"  Position: {frag.position}")
        print(f"  Mass: {frag.mass:.4f} Da")
        print(f"  m/z: {frag.mz:.4f}")
        print(f"  Charge: {frag.charge_state}")
        print(f"  Neutral mass: {frag.neutral_mass:.4f} Da")
        if frag.composition:
            comp_str = {str(elem): count for elem, count in frag.composition.items()}
            print(f"  Composition: {comp_str}")

    # ============================================================================
    # MZPAF OUTPUT
    # ============================================================================

    print("\n" + "=" * 60)
    print("MZPAF OUTPUT")
    print("=" * 60)

    # See paftacular documentation for details on mzPAF format

    print("\nFragment annotations in mzPAF format:")
    fragments: list[pt.Fragment] = peptide.fragment(ion_types=["b", "y"], charges=[2])
    for frag in fragments[:8]:  # Show first 8 fragments
        mzpaf = frag.to_mzpaf()
        print(f"  {mzpaf}")

    # serialize() with format parameter also works
    print("\nUsing serialize(format='mzpaf'):")
    for frag in fragments[:4]:
        print(f"  {frag.serialize(format='mzpaf')}")

    print("\n" + "=" * 60)

    # ============================================================================
    # FAST FRAGMENT
    # ============================================================================

    print("\n" + "=" * 60)
    print("FAST FRAGMENT")
    print("=" * 60)

    """
    fast_fragment() uses a prefix/suffix-sum algorithm to compute fragment m/z
    values without constructing Fragment objects. It is faster than fragment()
    for high-throughput use cases.

    Return type: dict[(IonType, charge)] -> list[float]
    Each list has length == len(peptide), ordered fragment position 1 to N.

    Limitations vs fragment():
    - No neutral losses (H2O, NH3, custom deltas)
    - No isotope shifts
    - No adduct charges (integer charges only)
    - No internal / immonium ions
    - Raises if the annotation has unknown or interval modifications
    """

    peptide = pt.parse("PEPT[Phospho]IDE")

    # --- OOP method ---
    mz_map = peptide.fast_fragment(ion_types=["b", "y"], charges=[1, 2])
    print(f"\nPeptide: {peptide}")
    for (ion_type, charge), mzs in mz_map.items():
        print(f"  ({ion_type}, z={charge}): {[round(v, 4) for v in mzs]}")

    # --- Functional API (identical result) ---
    print("\nFunctional API (pt.fast_fragment):")
    mz_map2 = pt.fast_fragment("PEPTIDE", ion_types=["b", "y"], charges=[1])
    for (ion_type, charge), mzs in mz_map2.items():
        print(f"  ({ion_type}, z={charge}): {[round(v, 4) for v in mzs]}")

    # --- Batch / parallel: pass a list of sequences ---
    print("\nBatch fast_fragment (list input):")
    sequences = ["PEPTIDE", "ACDEFGHIK", "LMNPQRST"]
    results = pt.fast_fragment(sequences, ion_types=["y"], charges=[1])
    for seq, mz_map3 in zip(sequences, results):
        (ion_type, charge), mzs = next(iter(mz_map3.items()))
        print(f"  {seq}: {[round(v, 4) for v in mzs]}")

    print("\n" + "=" * 60)


if __name__ == "__main__":
    run()

Isotope Calculations

Calculate isotopic distributions for peptides.

"""
Isotopic Distribution Calculations
===================================
Examples of calculating isotopic distributions from ProForma annotations.
"""

import peptacular as pt


def run():
    # Parse a simple peptide sequence
    annot = pt.parse("PEPTIDE")

    # ============================================================================
    # BASIC ISOTOPIC DISTRIBUTION
    # ============================================================================

    print("=" * 60)
    print("BASIC ISOTOPIC DISTRIBUTION")
    print("=" * 60)

    # --- Default Distribution ---
    # Returns list of IsotopicData with mass, neutron_count, and abundance
    # Abundances normalized so max peak = 1.0
    dist = annot.isotopic_distribution()
    print(f"\nPeptide: {annot.serialize()}")
    print(f"Monoisotopic mass: {annot.mass():.3f} Da")
    print("Default isotopic distribution:")
    for iso in dist:
        print(
            f"  mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}, neutrons: {iso.neutron_count}"
        )

    # --- Control Number of Isotopes ---
    dist_limited = annot.isotopic_distribution(max_isotopes=3)
    print("\nLimited to 3 most abundant isotopes:")
    for iso in dist_limited:
        print(f"  mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}")

    # --- Abundance Threshold ---
    # Only keep isotopes with abundance >= threshold (relative to max peak)
    dist_filtered = annot.isotopic_distribution(min_abundance_threshold=0.05)
    print("\nFiltered (≥5% of max peak):")
    for iso in dist_filtered:
        print(f"  mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}")

    # --- Neutron Offset Mode ---
    # Use neutron count instead of absolute mass (useful for matching patterns)
    dist_neutron = annot.isotopic_distribution(use_neutron_count=True)
    print("\nNeutron offset mode:")
    for iso in dist_neutron:
        print(f"  neutron offset: {iso.mass:>3.0f}, abundance: {iso.abundance:>6.3f}")

    # ============================================================================
    # DISTRIBUTION RESOLUTION
    # ============================================================================

    print("\n" + "=" * 60)
    print("DISTRIBUTION RESOLUTION")
    print("=" * 60)

    # --- High Resolution ---
    # More decimal places for precise mass calculations
    dist_high_res = annot.isotopic_distribution(distribution_resolution=5)
    print("\nHigh resolution (5 decimals):")
    for iso in dist_high_res[:3]:
        print(f"  mass: {iso.mass:.5f} Da, abundance: {iso.abundance:>6.3f}")

    # --- Low Resolution ---
    # Simulates lower instrument precision, combines nearby masses
    dist_low_res = annot.isotopic_distribution(distribution_resolution=2)
    print("\nLow resolution (2 decimals):")
    for iso in dist_low_res[:3]:
        print(f"  mass: {iso.mass:.2f} Da, abundance: {iso.abundance:>6.3f}")

    # ============================================================================
    # COMBINING WITH COMP PARAMETERS
    # ============================================================================

    print("\n" + "=" * 60)
    print("COMBINING WITH COMP PARAMETERS")
    print("=" * 60)

    # isotopic_distribution() accepts same parameters as comp()
    # Combine charge, isotopes, losses, and ion type
    dist_combined = annot.isotopic_distribution(
        ion_type="y", charge=2, isotopes=1, deltas={"H2O": 1}
    )
    print("\ny-ion, +2 charge, +1 13C, -H2O:")
    for iso in dist_combined[:4]:
        print(f"  m/z: {iso.mass:>8.3f}, abundance: {iso.abundance:>6.3f}")


if __name__ == "__main__":
    run()

Physiochemical Properties

Calculate various properties like pI, hydrophobicity, etc.

"""
Sequence Property Calculations
===============================
Examples of calculating physicochemical and structural properties of peptides.
Note: These calculations use only the amino acid sequence; modifications are not considered.
"""

import peptacular as pt

def run():
    # Parse a test peptide
    annot = pt.parse('PEPTIDE')

    # ============================================================================
    # SIMPLE PHYSICOCHEMICAL PROPERTIES
    # ============================================================================

    print("=" * 60)
    print("PHYSICOCHEMICAL PROPERTIES")
    print("=" * 60)

    # These properties return single float values
    print(f"Sequence: {annot}")
    print(f"Hydrophobicity: {annot.prop.hydrophobicity:.3f}")
    print(f"Flexibility: {annot.prop.flexibility:.3f}")
    print(f"Hydrophilicity: {annot.prop.hydrophilicity:.3f}")
    print(f"Surface accessibility: {annot.prop.surface_accessibility:.3f}")
    print(f"Polarity: {annot.prop.polarity:.3f}")
    print(f"Aromaticity: {annot.prop.aromaticity:.3f}")
    print(f"Isoelectric point (pI): {annot.prop.pi:.2f}")
    print(f"HPLC retention: {annot.prop.hplc:.3f}")
    print(f"Refractivity: {annot.prop.refractivity:.3f}")

    # ============================================================================
    # STRUCTURAL PROPERTIES
    # ============================================================================

    print("\n" + "=" * 60)
    print("STRUCTURAL PROPERTIES")
    print("=" * 60)

    # Secondary structure percentages
    print(f"Alpha helix: {annot.prop.alpha_helix_percent:.1f}%")
    print(f"Beta sheet: {annot.prop.beta_sheet_percent:.1f}%")
    print(f"Beta turn: {annot.prop.beta_turn_percent:.1f}%")
    print(f"Coil: {annot.prop.coil_percent:.1f}%")

    # Predicted secondary structure using different methods
    ss_dr = annot.prop.secondary_structure(pt.SecondaryStructureMethod.DELEAGE_ROUX)
    print(f"\nSecondary structure (Deleage-Roux method):")
    print(f"  Alpha helix: {ss_dr['alpha_helix']:.1f}%")
    print(f"  Beta sheet: {ss_dr['beta_sheet']:.1f}%")
    print(f"  Beta turn: {ss_dr['beta_turn']:.1f}%")
    print(f"  Coil: {ss_dr['coil']:.1f}%")

    # ============================================================================
    # COMPOSITION-BASED PROPERTIES
    # ============================================================================

    print("\n" + "=" * 60)
    print("COMPOSITION PROPERTIES")
    print("=" * 60)

    # Amino acid composition
    proline_pct = annot.prop.aa_property_percentage('P')
    acidic_pct = annot.prop.aa_property_percentage('DE')  # D and E
    basic_pct = annot.prop.aa_property_percentage('KR')   # K and R
    print(f"Proline content: {proline_pct:.1f}%")
    print(f"Acidic residues (D, E): {acidic_pct:.1f}%")
    print(f"Basic residues (K, R): {basic_pct:.1f}%")

    # Charge at different pH values
    print(f"\nNet charge at pH 7.0: {annot.prop.charge_at_ph(7.0):.2f}")
    print(f"Net charge at pH 3.0: {annot.prop.charge_at_ph(3.0):.2f}")
    print(f"Net charge at pH 11.0: {annot.prop.charge_at_ph(11.0):.2f}")

    # ============================================================================
    # CUSTOM PROPERTY CALCULATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("CUSTOM PROPERTY CALCULATIONS")
    print("=" * 60)

    # --- Basic calculation with default options ---
    prop = annot.prop.calc_property(
        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
        missing_aa_handling=pt.MissingAAHandling.ERROR,  # default
        aggregation_method=pt.AggregationMethod.SUM,     # default
        normalize=False,                                  # default
        weighting_scheme=pt.WeightingMethods.UNIFORM,    # default
        min_weight=0.0,                                   # default
        max_weight=1.0,                                   # default
    )
    print(f"Abraham-Leo hydrophobicity (sum): {prop:.2f}")

    # --- Using string identifiers ---
    prop_avg = annot.prop.calc_property(
        scale="deleage_roux_alpha_helix",
        missing_aa_handling="avg",
        aggregation_method="avg"
    )
    print(f"Alpha helix propensity (avg): {prop_avg:.3f}")

    # --- Custom scale dictionary ---
    custom_scale = {
        'A': 1.0, 'C': 2.0, 'D': 3.0, 'E': 4.0, 
        'F': 5.0, 'G': 6.0, 'H': 7.0, 'I': 8.0,
        'K': 9.0, 'L': 10.0, 'M': 11.0, 'N': 12.0,
        'P': 13.0, 'Q': 14.0, 'R': 15.0, 'S': 16.0,
        'T': 17.0, 'V': 18.0, 'W': 19.0, 'Y': 20.0
    }
    custom_prop = annot.prop.calc_property(scale=custom_scale, missing_aa_handling="avg")
    print(f"Custom scale (sum): {custom_prop:.2f}")

    # ============================================================================
    # AVAILABLE OPTIONS FOR calc_property()
    # ============================================================================

    print("\n" + "=" * 60)
    print("CALC_PROPERTY OPTIONS")
    print("=" * 60)

    """
    [Scale]
    - Use built-in scale enums (e.g., HydrophobicityScale.ABRAHAM_LEO)
    - Use scale name as string (e.g., "abraham_leo")
    - Provide custom dict (e.g., {'A': 1.0, 'C': 2.0, ...})
    - ~50 built-in scales available

    [missing_aa_handling]
    - 'avg': Use average of known values
    - 'min': Use minimum of known values
    - 'max': Use maximum of known values
    - 'median': Use median of known values
    - 'error': Raise error (default)
    - 'zero': Use 0.0
    - 'skip': Skip missing amino acids

    [aggregation_method]
    - 'sum': Sum of amino acid values (default)
    - 'avg': Average of amino acid values

    [normalize]
    - True: Normalize each AA's property value to [0, 1] before aggregation
    - False: Use raw values (default)

    [weighting_scheme]
    - 'uniform': All positions weighted equally (default)
    - 'linear': Linear weighting across sequence
    - 'exponential': Exponential weighting
    - 'gaussian': Gaussian weighting
    - 'sigmoid': Sigmoid weighting
    - 'cosine': Cosine weighting
    - 'sinusoidal': Sinusoidal weighting

    [min_weight, max_weight]
    - Define weight range (default: 0.0 to 1.0)
    """

    # ============================================================================
    # SLIDING WINDOW CALCULATIONS
    # ============================================================================

    print("=" * 60)
    print("SLIDING WINDOW CALCULATIONS")
    print("=" * 60)

    # Calculate property over sliding windows
    windows = annot.prop.property_windows(
        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
        window_size=4,
        missing_aa_handling=pt.MissingAAHandling.ERROR,
        aggregation_method=pt.AggregationMethod.SUM,
        normalize=False,
        weighting_scheme=pt.WeightingMethods.UNIFORM,
        min_weight=0.0,
        max_weight=1.0,
    )
    print(f"\nWindow size 4 (overlapping):")
    print(f"  Values: {[f'{v:.2f}' for v in windows]}")
    print(f"  Number of windows: {len(windows)}")

    # Different window size
    windows_large = annot.prop.property_windows(
        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
        window_size=3
    )
    print(f"\nWindow size 3:")
    print(f"  Values: {[f'{v:.2f}' for v in windows_large]}")

    # ============================================================================
    # PARTITIONED WINDOW CALCULATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("PARTITIONED WINDOW CALCULATIONS")
    print("=" * 60)

    # Divide sequence into fixed number of non-overlapping partitions
    partitions = annot.prop.property_partitions(
        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
        num_windows=3,
        aa_overlap=0,
        missing_aa_handling=pt.MissingAAHandling.ERROR,
        aggregation_method=pt.AggregationMethod.SUM,
        normalize=False,
        weighting_scheme=pt.WeightingMethods.UNIFORM,
        min_weight=0.0,
        max_weight=1.0,
    )
    print(f"\n3 partitions (no overlap):")
    print(f"  Values: {[f'{v:.2f}' for v in partitions]}")

    # With overlap between partitions
    partitions_overlap = annot.prop.property_partitions(
        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
        num_windows=3,
        aa_overlap=1
    )
    print(f"\n3 partitions (1 AA overlap):")
    print(f"  Values: {[f'{v:.2f}' for v in partitions_overlap]}")

    # ============================================================================
    # PRACTICAL EXAMPLES
    # ============================================================================

    print("\n" + "=" * 60)
    print("PRACTICAL EXAMPLES")
    print("=" * 60)

    # Example: Hydrophobicity profile for transmembrane prediction
    tm_peptide = pt.parse('LFGAIAGFIENGWEGMIDG')
    tm_windows = tm_peptide.prop.property_windows(
        scale=pt.HydrophobicityScale.KYTE_DOOLITTLE,
        window_size=9
    )
    print(f"\nTransmembrane peptide: {tm_peptide}")
    print(f"Kyte-Doolittle hydrophobicity profile (window=9):")
    for i, val in enumerate(tm_windows):
        print(f"  Position {i+1}: {val:.2f}")

    # Example: Charge distribution analysis
    charged_peptide = pt.parse('PKDEPKDE')
    charge_partitions = charged_peptide.prop.property_partitions(
        scale={'K': 1, 'R': 1, 'D': -1, 'E': -1},  # Simple charge scale
        num_windows=4,
        aa_overlap=0,
        missing_aa_handling='zero'
    )
    print(f"\nCharged peptide: {charged_peptide}")
    print(f"Charge distribution (4 regions):")
    for i, val in enumerate(charge_partitions):
        print(f"  Region {i+1}: {val:+.1f}")

    print("\n" + "=" * 60)


if __name__ == "__main__":
    run()

Converters

Convert sequences from other tools (IP2, DIANN, Casanovo, MS2PIP) to ProForma format.

"""
Sequence Format Conversion Examples
====================================
Examples of converting peptide sequences from other tools (IP2, DIANN, Casanovo)
to ProForma 2.1 format. All conversion functions support parallel execution.
"""

import peptacular as pt


def run():
    # ============================================================================
    # IP2 SEQUENCE CONVERSION
    # ============================================================================

    print("=" * 60)
    print("IP2 SEQUENCE CONVERSION")
    print("=" * 60)

    # Basic IP2 format: K.SEQUENCE.K
    ip2_seq = "K.PEPTIDE.K"
    proforma = pt.convert_ip2_sequence(ip2_seq)
    print(f"IP2: {ip2_seq}")
    print(f"ProForma: {proforma}\n")

    # ============================================================================
    # DIANN SEQUENCE CONVERSION
    # ============================================================================

    print("\n" + "=" * 60)
    print("DIANN SEQUENCE CONVERSION")
    print("=" * 60)

    # With modification
    diann_mod = "_YMGTLRGC[Carbamidomethyl]LLRLYHD_"
    proforma_mod = pt.convert_diann_sequence(diann_mod)
    print(f"DIANN with mod: {diann_mod}")
    print(f"ProForma: {proforma_mod}\n")

    # ============================================================================
    # CASANOVO SEQUENCE CONVERSION
    # ============================================================================

    print("\n" + "=" * 60)
    print("CASANOVO SEQUENCE CONVERSION")
    print("=" * 60)

    # Complex example
    casanovo_complex = "+43.006P+100EPTIDE"
    proforma_complex = pt.convert_casanovo_sequence(casanovo_complex)
    print(f"Casanovo complex: {casanovo_complex}")
    print(f"ProForma: {proforma_complex}")

    # Parse Casanovo format using annotation method
    casanovo_annot = pt.ProFormaAnnotation.from_casanovo("+43.006PEPTIDE")
    print(f"\nCasanovo (annotation method): {casanovo_annot.serialize()}")
    print(f"  Mass: {casanovo_annot.mass():.4f} Da")

    # ============================================================================
    # MS2PIP FORMAT CONVERSION
    # ============================================================================

    print("\n" + "=" * 60)
    print("MS2PIP FORMAT CONVERSION")
    print("=" * 60)

    # Convert TO MS2PIP format
    pf_annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
    unmod_seq, mod_str = pf_annot.to_ms2_pip()
    print(f"\nProForma: {pf_annot.serialize()}")
    print(f"MS2PIP sequence: {unmod_seq}")
    print(f"MS2PIP mods: {mod_str}")

    # Convert FROM MS2PIP format
    ms2pip_annot = pt.ProFormaAnnotation.from_ms2_pip(
        sequence="PEPTIDE", mod_str="0|Acetyl|3|Oxidation"
    )
    print(f"\nMS2PIP -> ProForma: {ms2pip_annot.serialize()}")

    # With static modifications
    ms2pip_static = pt.ProFormaAnnotation.from_ms2_pip(
        sequence="PEPTIDE", mod_str="0|Acetyl", static_mods={"C": "Carbamidomethyl"}
    )
    print(f"MS2PIP with static mods: {ms2pip_static.serialize()}")


if __name__ == "__main__":
    run()

Parallel Processing

Batch-process peptides across mass, composition, and property calculations, automatically parallelized for list inputs.

"""
Parallel Execution Examples
============================
Examples of using parallel processing with peptacular sequence functions.
Most functions in the sequence module accept lists and automatically use
multiprocessing for better performance on large datasets.
"""

import peptacular as pt
import time

# ============================================================================
# BASIC PARALLEL EXECUTION (must be run through __main__)
# ============================================================================

def run():
    print("=" * 60)
    print("BASIC PARALLEL EXECUTION")
    print("=" * 60)

    # Create a list of peptide sequences
    peptides = [
        'PEPTIDE',
        'PATRICK',
        'TYLER',
        'GARRETT'
    ]

    # Parse multiple sequences in parallel
    # Pass a list of strings - automatically uses multiprocessing
    annotations = pt.parse(peptides)
    print(f"\nParsed {len(annotations)} peptides:")
    for annot in annotations:
        print(f"  {annot.serialize()}")

    # ============================================================================
    # MASS CALCULATIONS IN PARALLEL
    # ============================================================================

    print("\n" + "=" * 60)
    print("MASS CALCULATIONS")
    print("=" * 60)

    # Calculate masses for multiple peptides
    masses = pt.mass(peptides)
    print("\nMasses:")
    for pep, mass in zip(peptides, masses):
        print(f"  {pep}: {mass:.4f} Da")

    # With charge states
    masses_charged = pt.mass(peptides, charge=2)
    print("\nMasses at +2 charge:")
    for pep, mass in zip(peptides, masses_charged):
        print(f"  {pep}: {mass:.4f} Da")

    # m/z calculations
    mz_values = pt.mz(peptides, charge=2)
    print("\nm/z at +2 charge:")
    for pep, mz in zip(peptides, mz_values):
        print(f"  {pep}: {mz:.4f}")

    # ============================================================================
    # COMPOSITION IN PARALLEL
    # ============================================================================

    print("\n" + "=" * 60)
    print("COMPOSITION CALCULATIONS")
    print("=" * 60)

    # Get compositions for multiple peptides
    compositions = pt.comp(peptides)
    print("\nCompositions:")
    for pep, comp in zip(peptides, compositions):
        comp_str = {str(elem): count for elem, count in comp.items()}
        print(f"  {pep}: {comp_str}")

    # ============================================================================
    # PHYSICOCHEMICAL PROPERTIES
    # ============================================================================

    print("\n" + "=" * 60)
    print("PHYSICOCHEMICAL PROPERTIES")
    print("=" * 60)

    # Calculate hydrophobicity for all peptides
    hydrophobicity = pt.hydrophobicity(peptides)
    print("\nHydrophobicity:")
    for pep, hydro in zip(peptides, hydrophobicity):
        print(f"  {pep}: {hydro:.3f}")

    # Calculate pI values
    pi_values = pt.pi(peptides)
    print("\nIsoelectric points:")
    for pep, pi in zip(peptides, pi_values):
        print(f"  {pep}: {pi:.2f}")

    # Calculate aromaticity
    aromaticity = pt.aromaticity(peptides)
    print("\nAromaticity:")
    for pep, arom in zip(peptides, aromaticity):
        print(f"  {pep}: {arom:.3f}")

    # ============================================================================
    # USING ANNOTATION OBJECTS
    # ============================================================================

    print("\n" + "=" * 60)
    print("USING ANNOTATION OBJECTS")
    print("=" * 60)

    # Can also pass lists of ProFormaAnnotation objects
    modified_peptides = [
        pt.parse('[Acetyl]-PEPTIDE'),
        pt.parse('PEM[Oxidation]TIDE'),
        pt.parse('SEQS[Phospho]UENCE/2')
    ]

    # Calculate masses from annotations
    annot_masses = pt.mass(modified_peptides)
    print("\nModified peptide masses:")
    for annot, mass in zip(modified_peptides, annot_masses):
        print(f"  {annot.serialize()}: {mass:.4f} Da")

    # ============================================================================
    # CUSTOM PROPERTY CALCULATIONS
    # ============================================================================

    print("\n" + "=" * 60)
    print("CUSTOM PROPERTY CALCULATIONS")
    print("=" * 60)

    # Calculate custom properties in parallel
    custom_props = pt.calc_property(
        peptides,
        scale=pt.HydrophobicityScale.KYTE_DOOLITTLE,
        aggregation_method='avg'
    )
    print("\nKyte-Doolittle hydrophobicity (average):")
    for pep, prop in zip(peptides, custom_props):
        print(f"  {pep}: {prop:.3f}")

    # ============================================================================
    # PERFORMANCE COMPARISON
    # ============================================================================

    print("\n" + "=" * 60)
    print("PERFORMANCE COMPARISON")
    print("=" * 60)

    # Create larger dataset for timing
    large_dataset = peptides * 10000  # 40,000 peptides

    # Time parallel execution
    start = time.time()
    _ = pt.mass(large_dataset)
    parallel_time = time.time() - start

    print(f"\nProcessed {len(large_dataset)} peptides:")
    print(f"  Parallel execution: {parallel_time:.3f} seconds")
    print(f"  Average per peptide: {parallel_time/len(large_dataset)*1000:.2f} ms")

    # Time serial execution for comparison
    start = time.time()
    _ = [pt.mass(pep) for pep in large_dataset]
    serial_time = time.time() - start
    print(f"  Serial execution: {serial_time:.3f} seconds")
    print(f"  Average per peptide: {serial_time/len(large_dataset)*1000:.2f} ms")

    print("\n" + "=" * 60)

if __name__ == "__main__":
    run()