Examples

This section provides practical examples demonstrating key features of Peptacular.

ProForma Notation

Basic usage of ProForma notation for representing modified peptides.

  1"""
  2ProForma Notation Examples
  3===========================
  4Comprehensive examples of supported ProForma 2.0 notation in peptacular.
  5Demonstrates parsing and serialization of various modification types and features.
  6"""
  7
  8import peptacular as pt
  9
 10
 11def run():
 12    # ============================================================================
 13    # BASIC SEQUENCES
 14    # ============================================================================
 15
 16    print("=" * 60)
 17    print("BASIC SEQUENCES")
 18    print("=" * 60)
 19
 20    # Simple unmodified peptide
 21    simple = pt.parse("PEPTIDE")
 22    print(f"Simple sequence: {simple.serialize()}")
 23
 24    # ============================================================================
 25    # TERMINAL MODIFICATIONS
 26    # ============================================================================
 27
 28    print("\n" + "=" * 60)
 29    print("TERMINAL MODIFICATIONS")
 30    print("=" * 60)
 31
 32    # Both terminals modified
 33    both = pt.parse("[Acetyl]-PEPTIDE-[Amidated]")
 34    print(f"Both terminals: {both.serialize()}")
 35
 36    # Multiple N-terminal modifications
 37    multi_nterm = pt.parse("[Acetyl][Formyl]-PEPTIDE")
 38    print(f"Multiple N-term mods: {multi_nterm.serialize()}")
 39
 40    # ============================================================================
 41    # INTERNAL MODIFICATIONS
 42    # ============================================================================
 43
 44    print("\n" + "=" * 60)
 45    print("INTERNAL MODIFICATIONS")
 46    print("=" * 60)
 47
 48    # Multiple different modifications
 49    multi_internal = pt.parse("PEM[Oxidation]TIS[Phospho]DE")
 50    print(f"Multiple modifications: {multi_internal.serialize()}")
 51
 52    # Multiple modifications on same residue
 53    same_residue = pt.parse("PEM[Oxidation][Dioxidation]TIDE")
 54    print(f"Multiple mods on M: {same_residue.serialize()}")
 55
 56    # ============================================================================
 57    # MODIFICATION NOTATION TYPES
 58    # ============================================================================
 59
 60    print("\n" + "=" * 60)
 61    print("MODIFICATION NOTATION TYPES")
 62    print("=" * 60)
 63
 64    # By name (Unimod/PSI-MOD)
 65    by_name = pt.parse("PEM[Oxidation]TIDE")
 66    print(f"By name: {by_name.serialize()}")
 67
 68    # By accession number requires the UNIMOD: or MOD: prefix for Unimod/PSI-MOD respectively
 69    by_accession = pt.parse("PEM[UNIMOD:35]TIDE")
 70    print(f"By Unimod accession: {by_accession.serialize()}")
 71
 72    # By mass (delta mass). requires sign (+/-)
 73    by_mass = pt.parse("PEM[+15.995]TIDE")
 74    print(f"By mass shift: {by_mass.serialize()}")
 75    neg_mass = pt.parse("PEPTIDE[-18.011]")
 76    print(f"Negative mass shift: {neg_mass.serialize()}")
 77
 78
 79    # By formula (requires Formula: prefix)
 80    by_formula = pt.parse("PEM[Formula:O]TIDE")
 81    print(f"By formula: {by_formula.serialize()}")
 82
 83    # by glycan composition (requires Glycan: prefix)
 84    by_glycan = pt.parse("NEEYN[Glycan:Hex5HexNAc4]K")
 85    print(f"By glycan composition: {by_glycan.serialize()}")
 86
 87
 88    # ============================================================================
 89    # CHARGE STATES
 90    # ============================================================================
 91
 92    print("\n" + "=" * 60)
 93    print("CHARGE STATES")
 94    print("=" * 60)
 95
 96    # Positive charge
 97    charged_pos = pt.parse("PEPTIDE/2")
 98    print(f"Charge +2: {charged_pos.serialize()}")
 99
100    # Negative charge
101    charged_neg = pt.parse("PEPTIDE/-2")
102    print(f"Charge -2: {charged_neg.serialize()}")
103
104    # ============================================================================
105    # CHARGE ADDUCTS
106    # ============================================================================
107
108    print("\n" + "=" * 60)
109    print("CHARGE ADDUCTS")
110    print("=" * 60)
111
112    # Single adduct (Total charge = +1)
113    na_adduct = pt.parse("PEPTIDE/[Na:z+1]")
114    print(f"Sodium adduct: {na_adduct.serialize()}")
115
116    # Multiple copies of same adduct (Total charge = +2)
117    multi_adduct = pt.parse("PEPTIDE/[Na:z+1^2]")
118    print(f"Two sodium adducts: {multi_adduct.serialize()}")
119
120    # Multiple different adducts (separated by commas) (Total charge = +3)
121    mixed_adducts = pt.parse("PEPTIDE/[Na:z+1^2,H:z+1]")
122    print(f"Mixed adducts: {mixed_adducts.serialize()}")
123
124    # Metal adduct with charge (Total charge = +2)
125    zn_adduct = pt.parse("PEPTIDE/[Zn:z+2]")
126    print(f"Zinc adduct (+2): {zn_adduct.serialize()}")
127
128    # ============================================================================
129    # LABILE MODIFICATIONS
130    # ============================================================================
131
132    print("\n" + "=" * 60)
133    print("LABILE MODIFICATIONS")
134    print("=" * 60)
135
136    labile = pt.parse("{Glycan:Hex}PEPTIDE")
137    print(f"Labile glycan: {labile.serialize()}")
138
139    multi_labile = pt.parse("{Phospho}PEPTIDE")
140    print(f"Multiple labile: {multi_labile.serialize()}")
141
142    # ============================================================================
143    # GLYCAN NOTATION
144    # ============================================================================
145
146    print("\n" + "=" * 60)
147    print("GLYCAN NOTATION")
148    print("=" * 60)
149
150    # Simple glycan
151    simple_glycan = pt.parse("NEEYN[Glycan:Hex5HexNAc4]K")
152    print(f"N-glycan: {simple_glycan.serialize()}")
153
154    # ============================================================================
155    # FIXED/STATIC MODIFICATIONS
156    # ============================================================================
157
158    print("\n" + "=" * 60)
159    print("FIXED/STATIC MODIFICATIONS")
160    print("=" * 60)
161
162    # Fixed modification applied to all matching residues (M and T on all positions)
163    fixed_mod = pt.parse("<[Oxidation]@M,T>MEMTIMDE")
164    print(f"Fixed oxidation on all M and T: {fixed_mod.serialize()}")
165
166    # Multiple fixed modifications
167    multi_fixed = pt.parse("<[Oxidation]@M><[Phospho]@S>MSPETIDE")
168    print(f"Multiple fixed mods: {multi_fixed.serialize()}")
169
170    # Fixed modification with position rules (N-term Proline)
171    fixed_nterm = pt.parse("<[Acetyl]@N-term:P>PEPTIDE")
172    print(f"Fixed N-term mod: {fixed_nterm.serialize()}")
173
174    # Fixed modification with position rules (Any C-term)
175    fixed_cterm = pt.parse("<[Amidated]@C-term>PEPTIDE")
176    print(f"Fixed C-term mod: {fixed_cterm.serialize()}")
177
178    # ============================================================================
179    # ISOTOPE LABELING
180    # ============================================================================
181
182    print("\n" + "=" * 60)
183    print("ISOTOPE LABELING")
184    print("=" * 60)
185
186    # C13 labeling (all carbons)
187    c13 = pt.parse("<13C>PEPTIDE")
188    print(f"C13 labeled: {c13.serialize()}")
189
190    # N15 labeling
191    n15 = pt.parse("<15N>PEPTIDE")
192    print(f"N15 labeled: {n15.serialize()}")
193
194    # Multiple isotope labels
195    multi_isotope = pt.parse("<13C><15N>PEPTIDE")
196    print(f"C13 and N15 labeled: {multi_isotope.serialize()}")
197
198    # Deuterium labeling
199    deuterium = pt.parse("<2H>PEP[Oxidation]TIDE") 
200    print(f"Deuterium labeled: {deuterium.serialize()}")
201
202    # ============================================================================
203    # AMBIGUOUS MODIFICATIONS (UNKNOWN POSITION)
204    # ============================================================================
205
206    print("\n" + "=" * 60)
207    print("AMBIGUOUS MODIFICATIONS (UNKNOWN POSITION)")
208    print("=" * 60)
209
210    # Unknown position
211    unknown_pos = pt.parse("[Phospho]?PEPTIDE")
212    print(f"Phospho somewhere: {unknown_pos.serialize()}")
213
214    # Multiple unknown modifications (Support caret for specifying multiple occurrences)
215    multi_unknown = pt.parse("[Phospho]^2[Acetyl]?PEPTIDE")
216    print(f"Multiple unknown: {multi_unknown.serialize()}")
217
218    # ============================================================================
219    # INTERVAL NOTATION (AMBIGUOUS LOCALIZATION)
220    # ============================================================================
221
222    print("\n" + "=" * 60)
223    print("INTERVAL NOTATION (LOCALIZATION RANGES)")
224    print("=" * 60)
225
226    # Modification in a range (1-indexed, inclusive)
227    interval = pt.parse("P(EP)[Phospho]TIDE")
228    print(f"Phospho in positions 1-3: {interval.serialize()}")
229
230    # Ambiguous interval (EP or PT or something with similar mass)
231    ambiguous_interval = pt.parse("P(?EP)[Phospho]TIDE")
232    print(f"Ambiguous intervals: {ambiguous_interval.serialize()}")
233
234
235    # ============================================================================
236    # INFO TAGS
237    # ============================================================================
238
239    print("\n" + "=" * 60)
240    print("INFO TAGS (NON-MODIFICATION ANNOTATIONS)")
241    print("=" * 60)
242
243    # Info tag (no mass contribution)
244    info_tag = pt.parse("PEPT[INFO:test]IDE")
245    print(f"Info tag: {info_tag.serialize()}")
246
247
248    # ============================================================================
249    # PEPTIDE NAMING
250    # ============================================================================
251
252    print("\n" + "=" * 60)
253    print("PEPTIDE NAMING")
254    print("=" * 60)
255
256    # Peptidoform name
257    peptide_name = pt.parse("(>MyPeptide)PEPTIDE")
258    print(f"Peptide name: {peptide_name.serialize()}")
259
260
261    # ============================================================================
262    # Multiple FEATURES COMBINED
263    # ============================================================================
264
265
266    print("\n" + "=" * 60)
267    print("MULTIPLE FEATURES COMBINED")
268    print("=" * 60)
269
270    # Combined info tag and modification
271    multi_info = pt.parse("PEPT[Phospho|INFO:quality=high]IDE")
272    print(f"Info + modification: {multi_info.serialize()}")
273
274    # Technically this is valid but no reason to do this. Peptacular only looks at the first modification in such cases.
275    multi_annot2 = pt.parse("PEPT[Phospho|Oxidation|+76.0]IDE")
276    print(f"Info + modification: {multi_annot2.serialize()}")
277
278
279if __name__ == "__main__":
280    run()

Mass, m/z, and Composition

Calculate masses, m/z ratios, and elemental compositions.

  1"""
  2Mass and Composition Calculations
  3==================================
  4Examples of calculating mass, m/z, and elemental composition from ProForma annotations.
  5"""
  6
  7import peptacular as pt
  8
  9
 10def run():
 11    # Parse a simple peptide sequence
 12    annot = pt.parse("PEPTIDE")
 13
 14    # ============================================================================
 15    # MASS CALCULATIONS
 16    # ============================================================================
 17
 18    print("=" * 60)
 19    print("MASS CALCULATIONS")
 20    print("=" * 60)
 21
 22    # --- Basic Mass Calculation ---
 23    # Default is monoisotopic precursor mass (includes terminal groups H and OH)
 24    mass = annot.mass()
 25    print(f"Default mass: {mass:.4f} Da")  # 799.3600
 26
 27    # Explicitly specify precursor ion type
 28    mass_p = annot.mass(ion_type="p")
 29    print(f"Precursor mass: {mass_p:.4f} Da")
 30
 31    # --- Neutral Mass (no terminal groups) ---
 32    neutral = annot.mass(ion_type=pt.IonType.NEUTRAL)
 33    print(f"Neutral mass: {neutral:.4f} Da")
 34
 35    # --- m/z Calculation ---
 36    # mz() divides mass by charge
 37    mz_2plus = annot.mz(charge=2)
 38    assert mz_2plus == annot.mass(charge=2) / 2
 39    print(f"m/z at charge +2: {mz_2plus:.4f}")
 40
 41    # --- Monoisotopic vs Average Mass ---
 42    mono_mass = annot.mass(monoisotopic=True)  # default
 43    avg_mass = annot.mass(monoisotopic=False)
 44    print(f"Monoisotopic: {mono_mass:.4f} Da")
 45    print(f"Average: {avg_mass:.4f} Da")
 46
 47    # --- Charge States ---
 48    # Integer charge assumes protonation/deprotonation
 49    mass_2plus = annot.mass(charge=2)
 50    mass_2minus = annot.mass(charge=-2)
 51    print(f"Mass at +2 charge: {mass_2plus:.4f} Da")
 52    print(f"Mass at -2 charge: {mass_2minus:.4f} Da")
 53
 54    # Adduct charges (overrides annotation charge)
 55    mass_na = annot.mass(charge="Na:z+1")
 56    mass_multi_adduct = annot.mass(charge=("Na:z+1^2", "H:z+1"))
 57    print(f"Mass with Na+ adduct: {mass_na:.4f} Da")
 58    print(f"Mass with multiple adducts: {mass_multi_adduct:.4f} Da")
 59
 60    # --- Isotopes ---
 61    # Integer assumes C13 isotopes
 62    mass_c13 = annot.mass(isotopes=1)
 63    print(f"Mass with 1x 13C: {mass_c13:.4f} Da")
 64
 65    # Custom isotope specification
 66    mass_custom_iso = annot.mass(isotopes={"17O": 2, "13C": 1})
 67    print(f"Mass with 2x 17O and 1x 13C: {mass_custom_iso:.4f} Da")
 68
 69    # --- Neutral Losses ---
 70    # Single loss
 71    mass_water_loss = annot.mass(deltas={"H2O": 1})
 72    print(f"Mass with H2O loss: {mass_water_loss:.4f} Da")
 73
 74    # Multiple losses
 75    mass_multi_loss = annot.mass(
 76        deltas={pt.NeutralDelta.WATER: 1, pt.NeutralDelta.AMMONIA: 2}
 77    )
 78    print(f"Mass with H2O + 2×NH3 loss: {mass_multi_loss:.4f} Da")
 79
 80    # ============================================================================
 81    # COMPOSITION CALCULATIONS
 82    # ============================================================================
 83
 84    print("\n" + "=" * 60)
 85    print("COMPOSITION CALCULATIONS")
 86    print("=" * 60)
 87
 88    # --- Basic Composition ---
 89    # Returns a Counter of ElementInfo objects
 90    comp = annot.comp()
 91    print("\nFull composition (ElementInfo objects):")
 92    for elem, count in comp.items():
 93        print(f"  {elem.symbol}: {count}")
 94
 95    # Convert to simple string representation
 96    comp_str = {str(elem): count for elem, count in comp.items()}
 97    print(f"\nSimple composition: {comp_str}")
 98
 99    # --- Composition with Modifications ---
100    # Apply charge and isotopes
101    comp_modified = annot.comp(charge="Na:z+1", isotopes={"17O": 2, "13C": 1})
102    comp_modified_str = {str(elem): count for elem, count in comp_modified.items()}
103    print(f"\nModified composition: {comp_modified_str}")
104
105    # ============================================================================
106    # WITH GLOBAL ISOTOPE MODIFICATIONS
107    # ============================================================================
108
109    print("\n" + "=" * 60)
110    print("GLOBAL ISOTOPE MODIFICATIONS")
111    print("=" * 60)
112
113    # Applies the global isotope to all residues
114    iso_annot = pt.parse("<13C>PEPTIDE")
115    iso_comp = iso_annot.comp()
116    iso_comp_str = {str(elem): count for elem, count in iso_comp.items()}
117    print(f"Composition with global 13C: {iso_comp_str}")
118
119    # will also apply isotopes to modifications where available
120    # Charge is applied after isotopes so will reflect in composition
121    mod_iso_annot = pt.parse("<2H>PEPT[Phospho]IDE/2")
122    mod_iso_comp = mod_iso_annot.comp()
123    mod_iso_comp_str = {str(elem): count for elem, count in mod_iso_comp.items()}
124    print(f"Composition with global 13C and Phospho mod: {mod_iso_comp_str}")
125
126
127if __name__ == "__main__":
128    run()

Annotation

Parse and work with ProForma annotations.

  1"""
  2ProForma Annotation Examples
  3=============================
  4Basic examples of parsing, serializing, and manipulating ProForma annotations.
  5"""
  6
  7import peptacular as pt
  8
  9
 10def run():
 11    # ============================================================================
 12    # PARSING ANNOTATIONS
 13    # ============================================================================
 14
 15    # Simple sequence
 16    simple: pt.ProFormaAnnotation = pt.parse("PEPTIDE")
 17    print(f"Simple: {simple.serialize()}")
 18
 19    # Chimeric sequence
 20    chimeric: list[pt.ProFormaAnnotation] = pt.parse_chimeric("PEPTIDE+PEPTIDE")
 21    print(f"Chimeric: {pt.serialize_chimeric(chimeric)}")
 22
 23    # ============================================================================
 24    # CREATING ANNOTATIONS PROGRAMMATICALLY
 25    # ============================================================================
 26
 27    # Create from scratch
 28    annot = pt.ProFormaAnnotation(sequence="PEPTIDE", charge=2)
 29    print(f"New annotation: {annot.serialize()}")
 30
 31    # Set internal Mods... it takes a dict of position -> {mod: count}
 32    annot = pt.ProFormaAnnotation(
 33        sequence="PEPTIDE", charge=2, internal_mods={2: {"Oxidation": 1}}
 34    )
 35    print(f"New annotation: {annot.serialize()}")
 36
 37    # Other modications are just {mod: count}
 38    annot = pt.ProFormaAnnotation(
 39        sequence="PEPTIDE",
 40        nterm_mods={"Acetyl": 1},
 41        internal_mods={2: {"Oxidation": 1, "Phospho": 1}},
 42        charge=2,
 43    )
 44    print(f"New annotation: {annot.serialize()}")
 45
 46    # ============================================================================
 47    # ACCESSING PROPERTIES
 48    # ============================================================================
 49
 50    print("\n" + "=" * 60)
 51    print("ACCESSING PROPERTIES")
 52    print("=" * 60)
 53
 54    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIS[Phospho]DE/2")
 55    print(f"Annotation: {annot.serialize()}\n")
 56
 57    print(f"Sequence: {annot.sequence}")
 58    print(f"Length: {len(annot)}")
 59    print(f"Charge state: {annot.charge_state}")
 60    print(f"Has N-term mods: {annot.has_nterm_mods}")
 61    print(f"Has internal mods: {annot.has_internal_mods}")
 62    print(f"Has charge: {annot.has_charge}")
 63
 64    # ============================================================================
 65    # SETTING MODIFICATIONS
 66    # ============================================================================
 67
 68    print("\n" + "=" * 60)
 69    print("SETTING MODIFICATIONS")
 70    print("=" * 60)
 71
 72    # Start fresh
 73    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
 74
 75    # Set N-terminal modification
 76    annot.set_nterm_mods({"Acetyl": 1})
 77    print(f"After N-term: {annot.serialize()}")
 78
 79    # Set internal modification at specific position
 80    annot.set_internal_mods_at_index(2, {"Oxidation": 1})
 81    print(f"After internal: {annot.serialize()}")
 82
 83    # Set charge
 84    annot.set_charge(2)
 85    print(f"After charge: {annot.serialize()}")
 86
 87    # ============================================================================
 88    # APPENDING MODIFICATIONS
 89    # ============================================================================
 90
 91    print("\n" + "=" * 60)
 92    print("APPENDING MODIFICATIONS")
 93    print("=" * 60)
 94
 95    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
 96
 97    # Append N-terminal mod
 98    annot.append_nterm_mod("Acetyl")
 99    print(f"Append N-term: {annot.serialize()}")
100
101    # Append internal mod
102    annot.append_internal_mod_at_index(2, "Oxidation")
103    print(f"Append internal: {annot.serialize()}")
104
105    # Append another internal mod at same position
106    annot.append_internal_mod_at_index(2, "Phospho")
107    print(f"Append another: {annot.serialize()}")
108
109    # ============================================================================
110    # EXTENDING MODIFICATIONS
111    # ============================================================================
112
113    print("\n" + "=" * 60)
114    print("EXTENDING MODIFICATIONS")
115    print("=" * 60)
116
117    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
118
119    # Extend with multiple mods
120    annot.extend_nterm_mods(["Acetyl", "Formyl"])
121    print(f"Extend N-term: {annot.serialize()}")
122
123    # Extend internal mods at position
124    annot.extend_internal_mods_at_index(2, ["Oxidation", "Phospho"])
125    print(f"Extend internal: {annot.serialize()}")
126
127    # ============================================================================
128    # REMOVING MODIFICATIONS
129    # ============================================================================
130
131    print("\n" + "=" * 60)
132    print("REMOVING MODIFICATIONS")
133    print("=" * 60)
134
135    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIS[Phospho]DE/2")
136    print(f"Original: {annot.serialize()}")
137
138    # Clear specific mod type (removes all)
139    annot.clear_nterm_mods()
140    print(f"Clear N-term: {annot.serialize()}")
141
142    # Clear internal mod at position (removes all at that position)
143    annot.clear_internal_mod_at_index(2)
144    print(f"Clear position 2: {annot.serialize()}")
145
146    # Clear all mods
147    annot.clear_mods()
148    print(f"Clear all: {annot.serialize()}")
149
150    # ============================================================================
151    # DECREMENTING MODIFICATIONS
152    # ============================================================================
153
154    print("\n" + "=" * 60)
155    print("DECREMENTING MODIFICATIONS")
156    print("=" * 60)
157
158    # When you have multiple copies of a mod, remove() decrements the count
159    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
160    annot.extend_nterm_mods(["Acetyl", "Acetyl", "Formyl"])
161    print(f"Original: {annot.serialize()}")
162
163    # Remove one Acetyl (decrements count)
164    annot.remove_nterm_mod("Acetyl")
165    print(f"After removing 1 Acetyl: {annot.serialize()}")
166
167    # Remove another Acetyl
168    annot.remove_nterm_mod("Acetyl")
169    print(f"After removing another Acetyl: {annot.serialize()}")
170
171    # Remove Formyl
172    annot.remove_nterm_mod("Formyl")
173    print(f"After removing Formyl: {annot.serialize()}")
174
175    # Works with internal mods too
176    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
177    annot.extend_internal_mods_at_index(2, ["Oxidation", "Oxidation", "Phospho"])
178    print(f"\nWith internal mods: {annot.serialize()}")
179
180    annot.remove_internal_mod_at_index(2, "Oxidation")
181    print(f"Remove 1 Oxidation: {annot.serialize()}")
182
183    annot.remove_internal_mod_at_index(2, "Oxidation")
184    print(f"Remove another Oxidation: {annot.serialize()}")
185
186    # ============================================================================
187    # POPPING MODIFICATIONS
188    # ============================================================================
189
190    print("\n" + "=" * 60)
191    print("POPPING MODIFICATIONS")
192    print("=" * 60)
193
194    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE/2")
195    print(f"Original: {annot.serialize()}")
196
197    # Pop N-term mods (returns the mods)
198    nterm = annot.pop_nterm_mods()
199    print(f"Popped N-term: {nterm}")
200    print(f"After pop: {annot.serialize()}")
201
202    # Pop charge
203    charge = annot.pop_charge()
204    print(f"Popped charge: {charge}")
205    print(f"After pop charge: {annot.serialize()}")
206
207    # ============================================================================
208    # WORKING WITH STATIC MODS
209    # ============================================================================
210
211    print("\n" + "=" * 60)
212    print("STATIC MODIFICATIONS")
213    print("=" * 60)
214
215    # Add static mod by residue
216    annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
217    annot.add_static_mod_by_residue("E", "Oxidation")
218    print(f"Static mod on E: {annot.serialize()}")
219
220    # Condense to internal mods
221    annot.condense_static_mods()
222    print(f"Condensed: {annot.serialize()}")
223
224    # ============================================================================
225    # SLICING
226    # ============================================================================
227
228    print("\n" + "=" * 60)
229    print("SLICING")
230    print("=" * 60)
231
232    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
233    print(f"Original: {annot.serialize()}")
234
235    # Slice using indices
236    sub = annot[2:5]
237    print(f"Slice [2:5]: {sub.serialize()}")
238
239    # Slice preserves modifications
240    sub_with_mod = annot[1:4]
241    print(f"Slice [1:4]: {sub_with_mod.serialize()}")
242
243    # ============================================================================
244    # COPYING
245    # ============================================================================
246
247    print("\n" + "=" * 60)
248    print("COPYING")
249    print("=" * 60)
250
251    annot = pt.parse("PEM[Oxidation]TIDE")
252    print(f"Original: {annot.serialize()}")
253
254    # Make a copy
255    copy = annot.copy()
256    copy.append_nterm_mod("Acetyl")
257    print(f"Copy modified: {copy.serialize()}")
258    print(f"Original unchanged: {annot.serialize()}")
259
260    # ============================================================================
261    # CHECKING MODIFICATIONS
262    # ============================================================================
263
264    print("\n" + "=" * 60)
265    print("CHECKING FOR MODIFICATIONS")
266    print("=" * 60)
267
268    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE/2")
269    print(f"Annotation: {annot.serialize()}\n")
270
271    # Check for specific mod types
272    print(f"Has N-term mods: {annot.has_nterm_mods}")
273    print(f"Has C-term mods: {annot.has_cterm_mods}")
274    print(f"Has internal mods: {annot.has_internal_mods}")
275    print(f"Has charge: {annot.has_charge}")
276
277    # Check if has any mods
278    print(f"Has any mods: {annot.has_mods()}")
279    print(
280        f"Has internal/charge: {annot.has_mods([pt.ModType.INTERNAL, pt.ModType.CHARGE])}"
281    )
282    print(f"Has internal/charge: {annot.has_mods(['internal', 'charge'])}")
283
284    # ============================================================================
285    # SERIALIZATION OPTIONS
286    # ============================================================================
287
288    print("\n" + "=" * 60)
289    print("SERIALIZATION")
290    print("=" * 60)
291
292    annot = pt.parse("[Acetyl]-PEPTIDE/2")
293
294    # Full serialization
295    print(f"Full: {annot.serialize()}")
296
297    # Strip mods before serializing
298    stripped = annot.copy().strip_mods()
299    print(f"Stripped: {stripped.serialize()}")
300
301    # ============================================================================
302    # VALIDATION
303    # ============================================================================
304
305    print("\n" + "=" * 60)
306    print("VALIDATION")
307    print("=" * 60)
308
309    # By default, validation is OFF for performance
310    annot_no_val = pt.ProFormaAnnotation(sequence="PEPTIDE", validate=False)
311    print(f"No validation (default): {annot_no_val.serialize()}")
312
313    # Enable validation when creating (and for methods that modify the annotation)
314    annot_with_val = pt.ProFormaAnnotation(sequence="PEPTIDE", validate=True)
315    print(f"With validation: {annot_with_val.serialize()}")
316
317    # Validation checks modification syntax (can be disabled per method)
318    print("\nAttempting to add invalid modification with validation ON:")
319    try:
320        annot_with_val.append_internal_mod_at_index(0, "InvalidMod123")
321        print(f"  Error: Modification added unexpectedly: {annot_with_val.serialize()}")
322    except Exception as e:
323        # successfully raises error
324        print(f"Successfully caught error: {e}")
325
326    # Validation checks modification syntax (can be disabled per method)
327    print("\nAttempting to add invalid modification with validation OFF:")
328    try:
329        annot_with_val.append_internal_mod_at_index(0, "InvalidMod123", validate=False)
330        print(f"  Success (no validation): {annot_no_val.serialize()}")
331    except Exception as e:
332        print(f"  Error: {e}")
333
334
335if __name__ == "__main__":
336    run()

Annotation with Modification Objects

Advanced annotation examples using modification objects.

  1"""
  2ProForma Mod Objects Examples
  3==============================
  4Demonstrates working with Mods and Mod objects returned from ProForma annotations.
  5"""
  6
  7import peptacular as pt
  8
  9
 10def run():
 11    # ============================================================================
 12    # ACCESSING MODS OBJECTS
 13    # ============================================================================
 14
 15    # Parse a ProForma annotation with various modifications
 16    annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation][Phospho]TIS[Phospho]DE/2")
 17    print(f"Annotation: {annot.serialize()}\n")
 18
 19    # Access different mod collections - these return Mods objects
 20    print("N-terminal mods:", annot.nterm_mods)
 21    print("Internal mods at pos 2:", annot.get_internal_mods_at_index(2))
 22    print("Internal mods at pos 5:", annot.get_internal_mods_at_index(5))
 23
 24    # ============================================================================
 25    # ITERATING OVER MODS
 26    # ============================================================================
 27
 28    print("\n" + "=" * 60)
 29    print("ITERATING OVER MODS")
 30    print("=" * 60)
 31
 32    annot = pt.parse("[Acetyl][Acetyl][Formyl]-PEPTIDE")
 33    nterm = annot.nterm_mods
 34
 35    print(f"N-terminal mods: {nterm}\n")
 36
 37    # Iterate through Mod objects
 38    print("Individual Mod objects:")
 39    for mod in nterm:
 40        print(f"  {mod.value} (count: {mod.count})")
 41
 42    # ============================================================================
 43    # WORKING WITH MOD OBJECTS
 44    # ============================================================================
 45
 46    print("\n" + "=" * 60)
 47    print("MOD OBJECT PROPERTIES")
 48    print("=" * 60)
 49
 50    annot = pt.parse("PEM[Oxidation][Oxidation][Phospho]TIDE")
 51    internal_mods = annot.get_internal_mods_at_index(2)
 52
 53    print(f"Internal mods at position 2: {internal_mods}\n")
 54
 55    for mod in internal_mods:
 56        print(f"Modification: {mod.value}")
 57        print(f"  Count: {mod.count}")
 58        print(f"  Mass (mono): {mod.get_mass(monoisotopic=True):.4f}")
 59        print(f"  Mass (avg): {mod.get_mass(monoisotopic=False):.4f}")
 60        print(f"  Composition: {mod.get_composition()}")
 61        print(f"  Charge: {mod.get_charge()}")
 62        print()
 63
 64    # ============================================================================
 65    # ACCESSING PARSED MOD VALUES
 66    # ============================================================================
 67
 68    print("\n" + "=" * 60)
 69    print("PARSED MOD VALUES")
 70    print("=" * 60)
 71
 72    annot = pt.parse("PEM[Oxidation]TIS[Phospho]DE")
 73
 74    # Get mods at position 2 (M with Oxidation)
 75    mods_at_2 = annot.get_internal_mods_at_index(2)
 76    print(f"Mods at position 2: {mods_at_2}\n")
 77
 78    # Access parsed items as (modification, count) tuples
 79    print("Parsed items:")
 80    for mod_value, count in mods_at_2.parse_items():
 81        print(f"  {mod_value} × {count}")
 82        print(f"    Type: {type(mod_value)}")
 83
 84    # ============================================================================
 85    # CHECKING MOD PRESENCE
 86    # ============================================================================
 87
 88    print("\n" + "=" * 60)
 89    print("CHECKING MOD PRESENCE")
 90    print("=" * 60)
 91
 92    annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation]TIDE")
 93    nterm = annot.nterm_mods
 94
 95    print(f"N-terminal mods: {nterm}\n")
 96    print(f"Contains 'Acetyl': {'Acetyl' in nterm}")
 97    print(f"Contains 'Phospho': {'Phospho' in nterm}")
 98    print(f"Contains 'Formyl': {'Formyl' in nterm}")
 99
100    # ============================================================================
101    # WORKING WITH DIFFERENT MOD TYPES
102    # ============================================================================
103
104    print("\n" + "=" * 60)
105    print("DIFFERENT MOD TYPES")
106    print("=" * 60)
107
108    # Isotope modifications
109    annot = pt.parse("<15N>PEPTIDE")
110    isotope = annot.isotope_mods
111    print(f"Isotope mods: {isotope}")
112    print(f"  Type: {isotope.mod_type}")
113    print(f"  Serialized: {isotope.serialize()}\n")
114
115    # Static modifications
116    annot = pt.parse("<[Carbamidomethyl]@C>PEPTCDE")
117    static = annot.static_mods
118    print(f"Static mods: {static}")
119    print(f"  Type: {static.mod_type}")
120    print(f"  Serialized: {static.serialize()}\n")
121
122    # Labile modifications
123    annot = pt.parse("{Glycan:Hex}PEPTIDE")
124    labile = annot.labile_mods
125    print(f"Labile mods: {labile}")
126    print(f"  Type: {labile.mod_type}")
127    print(f"  Serialized: {labile.serialize()}\n")
128
129    # Charge adducts
130    annot = pt.parse("PEPTIDE/[Na:z+1]")
131    charge = annot.charge_adducts
132    print(f"Charge adducts: {charge}")
133    print(f"  Type: {charge.mod_type}")
134    print(f"  Serialized: {charge.serialize()}")
135
136    # ============================================================================
137    # MOD COPYING
138    # ============================================================================
139
140    print("\n" + "=" * 60)
141    print("COPYING MOD OBJECTS")
142    print("=" * 60)
143
144    annot = pt.parse("[Acetyl]-PEPTIDE")
145    nterm = annot.nterm_mods
146
147    print(f"Original: {nterm}")
148
149    # Copy a Mods collection
150    nterm_copy = nterm.copy()
151    print(f"Copy: {nterm_copy}")
152    print(f"Are they equal: {nterm._mods == nterm_copy._mods}")
153    print(f"Are they the same object: {nterm is nterm_copy}")
154
155    # ============================================================================
156    # WORKING WITH MULTIPLE MODS AT SAME POSITION
157    # ============================================================================
158
159    print("\n" + "=" * 60)
160    print("MULTIPLE MODS AT SAME POSITION")
161    print("=" * 60)
162
163    annot = pt.parse("PEM[Oxidation][Oxidation][Phospho][Acetyl]TIDE")
164    mods = annot.get_internal_mods_at_index(2)
165
166    print(f"Mods at position 2: {mods}\n")
167
168    print("Individual modifications:")
169    for mod in mods:
170        print(f"  {mod.value} × {mod.count}")
171
172    print(f"\nTotal mass contribution: {mods.get_mass():.4f}")
173    print(f"Total composition: {mods.get_composition()}")
174
175    # ============================================================================
176    # ACCESSING UNDERLYING PROFORMA COMPONENTS
177    # ============================================================================
178
179    print("\n" + "=" * 60)
180    print("UNDERLYING PROFORMA COMPONENTS")
181    print("=" * 60)
182
183    # Different modification formats
184    annot1 = pt.parse("PEM[Oxidation]TIDE")
185    annot2 = pt.parse("PEM[UNIMOD:35]TIDE")
186    annot3 = pt.parse("PEM[+15.995]TIDE")
187    annot4 = pt.parse("PEM[Formula:O]TIDE")
188
189    print("Different representations of Oxidation:\n")
190
191    for i, annot in enumerate([annot1, annot2, annot3, annot4], 1):
192        mods = annot.get_internal_mods_at_index(2)
193        for mod in mods:
194            print(f"{i}. {annot.serialize()}")
195            print(f"   Parsed value: {mod.value}")
196            print(f"   Type: {type(mod.value).__name__}")
197            print(f"   Mass: {mod.get_mass():.4f}\n")
198
199    # ============================================================================
200    # VALIDATING MODS
201    # ============================================================================
202
203    print("=" * 60)
204    print("VALIDATING MODS")
205    print("=" * 60)
206
207    annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
208    nterm = annot.nterm_mods
209    internal = annot.get_internal_mods_at_index(2)
210
211    print(f"N-terminal mods valid: {nterm.is_valid}")
212    print(f"Validation result: {nterm.validate()}")
213
214    print(f"\nInternal mods valid: {internal.is_valid}")
215    print(f"Validation result: {internal.validate()}")
216
217    # ============================================================================
218    # WORKING WITH INTERVALS
219    # ============================================================================
220
221    print("\n" + "=" * 60)
222    print("INTERVAL MODIFICATIONS")
223    print("=" * 60)
224
225    annot = pt.parse("PEP(TIS)[Phospho]DE")
226
227    print(f"Annotation: {annot.serialize()}\n")
228
229    if annot.has_intervals:
230        print("Intervals:")
231        for interval in annot.intervals:
232            print(f"  Range: {interval.start}-{interval.end}")
233            print(f"  Ambiguous: {interval.ambiguous}")
234            print(f"  Has mods: {interval.has_mods}")
235            if interval.has_mods:
236                print(f"  Mods: {interval.mods}")
237                for mod in interval.mods:
238                    print(f"    {mod.value} × {mod.count}")
239
240    # ============================================================================
241    # SERIALIZATION
242    # ============================================================================
243
244    print("\n" + "=" * 60)
245    print("SERIALIZATION")
246    print("=" * 60)
247
248    annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation][Phospho]TIDE/2")
249
250    print("Full annotation:", annot.serialize())
251    print("\nIndividual mod serialization:")
252    print(f"  N-term: {annot.nterm_mods.serialize()}")
253    print(f"  Position 2: {annot.get_internal_mods_at_index(2).serialize()}")
254
255    # Show how mods serialize differently based on type
256    print("\nMod type serialization patterns:")
257    examples = [
258        ("[Acetyl]-PEPTIDE", "nterm_mods"),
259        ("PEPTIDE-[Amidated]", "cterm_mods"),
260        ("{Glycan:Hex}PEPTIDE", "labile_mods"),
261        ("[Phospho]?PEPTIDE", "unknown_mods"),
262        ("<15N>PEPTIDE", "isotope_mods"),
263        ("PEPTIDE/[Na:z+1]", "charge_adducts"),
264    ]
265
266    for proforma, attr in examples:
267        annot = pt.parse(proforma)
268        mods = getattr(annot, attr)
269        print(f"  {proforma:30s} -> {mods.serialize()}")
270
271
272if __name__ == "__main__":
273    run()

Digestion

Digest protein sequences with various proteases.

  1"""
  2Protein Digestion Examples
  3===========================
  4Simple examples of in-silico enzymatic digestion using ProForma.
  5All digestion methods return Span objects (start, end, missed_cleavages).
  6Use annotation[span] to get the actual peptide.
  7"""
  8
  9import peptacular as pt
 10
 11
 12def run():
 13    # ============================================================================
 14    # SIMPLE DIGESTION (AA Based)
 15    # ============================================================================
 16
 17    protein = pt.parse("[Amidated]-PEPTIDEKPEPTIDERPEPT[Phospho]IDER-[+57]")
 18
 19    print("=" * 60)
 20    print("SIMPLE DIGESTION (AA BASED)")
 21    print("=" * 60)
 22    print(f"Protein: {protein}\n")
 23
 24    # Basic trypsin-like digestion
 25    print("Trypsin-like (cleave after K/R):")
 26    for span in protein.simple_digest(cleave_on="KR"):
 27        peptide = protein[span]
 28        print(f"  {peptide.serialize()} - span: {span}")
 29
 30    # With restrictions
 31    print("\nWith restrictions (cleave after K/R, but not before N or after P):")
 32    for span in protein.simple_digest(
 33        cleave_on="KR", restrict_before="N", restrict_after="P", cterminal=True
 34    ):
 35        print(f"  {protein[span].serialize()}")
 36
 37    # ============================================================================
 38    # DIGESTION (REGEX BASED)
 39    # ============================================================================
 40
 41    print("\n" + "=" * 60)
 42    print("DIGESTION (REGEX BASED)")
 43    print("=" * 60)
 44
 45    # Using predefined enzyme enum
 46    print("\nUsing Proteases enum:")
 47    for span in protein.digest(pt.Proteases.TRYPSIN):
 48        print(f"  {protein[span].serialize()}")
 49
 50    # Using enzyme string
 51    print("\nUsing enzyme string 'trypsin':")
 52    for span in protein.digest("trypsin"):
 53        print(f"  {protein[span].serialize()}")
 54
 55    # Custom regex
 56    print("\nCustom regex (cleave after A or E):")
 57    for span in protein.digest("(?<=[AE])"):
 58        print(f"  {protein[span].serialize()}")
 59
 60    # ============================================================================
 61    # CLEAVAGE SITES
 62    # ============================================================================
 63
 64    print("\n" + "=" * 60)
 65    print("CLEAVAGE SITES")
 66    print("=" * 60)
 67
 68    print("\nCleavage positions for trypsin (after K/R):")
 69    sites = list(
 70        protein.simple_cleavage_sites(
 71            cleave_on="KR",
 72            restrict_after="P",
 73            restrict_before="N",
 74            cterminal=True,
 75        )
 76    )
 77    print(f"  Sites: {sites}")
 78    print(f"  Sequence: {protein.sequence}")
 79    print(
 80        f"            {''.join('^' if i in sites else ' ' for i in range(len(protein.sequence)))}"
 81    )
 82
 83    print("\nCleavage positions for included trypsin regex:")
 84    # can also use Proteases.TRYPSIN or custom regex
 85    sites_regex = list(protein.cleavage_sites("trypsin"))
 86    print(f"  Sites: {sites_regex}")
 87
 88    # ============================================================================
 89    # MISSED CLEAVAGES
 90    # ============================================================================
 91
 92    print("\n" + "=" * 60)
 93    print("MISSED CLEAVAGES")
 94    print("=" * 60)
 95
 96    print("\nWith 1 missed cleavage:")
 97    for span in protein.digest("trypsin", missed_cleavages=1):
 98        print(f"  {protein[span].serialize()}")
 99
100    # ============================================================================
101    # LENGTH FILTERING
102    # ============================================================================
103
104    print("\n" + "=" * 60)
105    print("LENGTH FILTERING")
106    print("=" * 60)
107
108    print("\nPeptides between 7-15 amino acids:")
109    for span in protein.digest("trypsin", min_len=7, max_len=15):
110        peptide = protein[span]
111        print(f"  {peptide.serialize()} (length: {len(peptide)})")
112
113    # ============================================================================
114    # SEMI-ENZYMATIC DIGESTION
115    # ============================================================================
116
117    print("\n" + "=" * 60)
118    print("SEMI-ENZYMATIC")
119    print("=" * 60)
120
121    print("\nSemi-enzymatic (one end must be enzymatic):")
122    for span in protein.digest("trypsin", semi=True, min_len=5, max_len=10):
123        print(f"  {protein[span].serialize()}")
124
125
126if __name__ == "__main__":
127    run()

Fragmentation

Generate theoretical fragment ions for peptides.

  1"""
  2Fragment Generation Examples
  3=============================
  4Examples of generating fragment ions from ProForma annotations.
  5All fragment methods return Fragment objects with mass, m/z, and composition.
  6"""
  7
  8import peptacular as pt
  9
 10
 11def run():
 12    # ============================================================================
 13    # BASIC FRAGMENTATION
 14    # ============================================================================
 15
 16    peptide = pt.parse("PEPT[Phospho]IDE-[Acetyl]")
 17
 18    print("=" * 60)
 19    print("BASIC FRAGMENTATION")
 20    print("=" * 60)
 21    print(f"Peptide: {peptide}\n")
 22
 23    # --- b-ions (N-terminal fragments) ---
 24    print("b-ions (N-terminal):")
 25    for frag in peptide.fragment(ion_types=["b"]):
 26        print(f"  {frag}")
 27
 28    # --- y-ions (C-terminal fragments) ---
 29    print("\ny-ions (C-terminal):")
 30    for frag in peptide.fragment(ion_types=["y"]):
 31        print(f"  {frag}")
 32
 33    # ============================================================================
 34    # FRAGMENT ION TYPES
 35    # ============================================================================
 36
 37    print("\n" + "=" * 60)
 38    print("DIFFERENT ION TYPES")
 39    print("=" * 60)
 40
 41    # Generate multiple ion types at once
 42    print("\na, b, c ions:")
 43    for frag in peptide.fragment(ion_types=["a", "b", "c"]):
 44        print(f"  {frag}")
 45
 46    print("\nx, y, z ions:")
 47    for frag in peptide.fragment(ion_types=["x", "y", "z"]):
 48        print(f"  {frag}")
 49
 50    # ============================================================================
 51    # CHARGED FRAGMENTS
 52    # ============================================================================
 53
 54    print("\n" + "=" * 60)
 55    print("CHARGED FRAGMENTS")
 56    print("=" * 60)
 57
 58    # Charge state
 59    print("\nb-ions at +2 charge:")
 60    for frag in peptide.fragment(ion_types=["b"], charges=[2]):
 61        print(f"  {frag}")
 62
 63    # Adduct charges
 64    print("\ny-ions with Na+ adduct:")
 65    for frag in peptide.fragment(ion_types=["y"], charges=["Na:z+1"]):
 66        print(f"  {frag}")
 67
 68    # ============================================================================
 69    # DELTAS (User Specified)
 70    # ============================================================================
 71
 72    print("\n" + "=" * 60)
 73    print("DELTAS")
 74    print("=" * 60)
 75
 76    # -18 loss applied to all ions
 77    print("\ny-ions with -18 loss:")
 78    for frag in peptide.fragment(
 79        ion_types=["y"],
 80        deltas=[-18.0],  # Custom delta of -18.0 Da
 81    ):
 82        print(f"  {frag}")
 83
 84    # By default deltas is (None,) so to also generate fragments with no losses you must include None
 85    print("\ny-ions with -18 and No loss:")
 86    for frag in peptide.fragment(
 87        ion_types=["y"],
 88        deltas=[-18.0, None],  # Custom delta of -18.0 Da and no loss
 89    ):
 90        print(f"  {frag}")
 91
 92    # ============================================================================
 93    # NEUTRAL DELTAS
 94    # ============================================================================
 95
 96    # in addition neutral deltas can be specified to apply common losses like H2O or NH3 to appropriate fragments
 97    # these work in addition to any custom deltas specified above
 98
 99    print("\n" + "=" * 60)
100    print("NEUTRAL DELTAS")
101    print("=" * 60)
102
103    # Water loss (Selectively applied to fragments that can lose H2O (containing ["S", "T", "D", "E"]))
104    print("\ny-ions with H2O loss:")
105    for frag in peptide.fragment(
106        ion_types=["y"],
107        neutral_deltas=["H2O"],
108        max_ndeltas=2,
109    ):
110        print(f"  {frag}")
111
112    # Multiple losses. Can also specify neutral deltas as their enum types
113    print("\nb-ions with H2O and NH3 loss:")
114    for frag in peptide.fragment(
115        ion_types=["b"],
116        neutral_deltas=[pt.NeutralDelta.WATER, pt.NeutralDelta.AMMONIA],
117        max_ndeltas=2,
118    ):
119        print(f"  {frag}")
120
121    # ============================================================================
122    # ISOTOPES
123    # ============================================================================
124
125    print("\n" + "=" * 60)
126    print("ISOTOPIC FRAGMENTS")
127    print("=" * 60)
128
129    # C13 isotopes
130    print("\ny-ions with 1x 13C:")
131    for frag in peptide.fragment(ion_types=["y"], isotopes=[1]):
132        print(f"  {frag}")
133
134    # Custom isotopes
135    print("\nb-ions with 2x 17O:")
136    for frag in peptide.fragment(ion_types=["b"], isotopes=[{"17O": 2}]):
137        print(f"  {frag}")
138
139    # ============================================================================
140    # INTERNAL FRAGMENTS
141    # ============================================================================
142
143    print("\n" + "=" * 60)
144    print("INTERNAL FRAGMENTS")
145    print("=" * 60)
146
147    print("\nInternal fragments (min_len=3, max_len=5):")
148    for frag in peptide.fragment(ion_types=["ax"]):
149        if frag.position and isinstance(frag.position, tuple):
150            start, end = frag.position
151            if 3 <= (end - start) <= 5:
152                print(f"  {frag}")
153
154    # ============================================================================
155    # IMMONIUM IONS
156    # ============================================================================
157
158    print("\n" + "=" * 60)
159    print("IMMONIUM IONS")
160    print("=" * 60)
161
162    print("\nImmonium ions:")
163    for frag in peptide.fragment(ion_types=["i"]):
164        print(f"  {frag}")
165
166    # ============================================================================
167    # PRECURSOR ION
168    # ============================================================================
169
170    print("\n" + "=" * 60)
171    print("PRECURSOR ION")
172    print("=" * 60)
173
174    print("\nPrecursor ion at +2 charge:")
175    for frag in peptide.fragment(ion_types=["p"], charges=[2]):
176        print(f"  {frag}")
177
178    # ============================================================================
179    # COMBINING OPTIONS
180    # ============================================================================
181
182    print("\n" + "=" * 60)
183    print("COMBINING OPTIONS")
184    print("=" * 60)
185
186    print("\ny-ions: +2 charge, H2O loss, 1x 13C:")
187    for frag in peptide.fragment(ion_types=["y"], charges=[2], neutral_deltas=["H2O"], isotopes=[1]):
188        print(f"  {frag}")
189
190    # ============================================================================
191    # ACCESSING FRAGMENT PROPERTIES
192    # ============================================================================
193
194    print("\n" + "=" * 60)
195    print("FRAGMENT PROPERTIES")
196    print("=" * 60)
197
198    """
199    Unless otherwise specified, fragments do not include sequence or composition data.
200    This can be enabled with the `include_sequence` and `calculate_composition` flags.
201    """
202    b_ions: list[pt.Fragment] = peptide.fragment(ion_types=["b"], charges=[2], calculate_composition=True)
203    if len(b_ions) > 0:
204        frag = b_ions[0]
205        print(f"\nExample fragment: {frag}")
206        print(f"  Ion type: {frag.ion_type}")
207        print(f"  Position: {frag.position}")
208        print(f"  Mass: {frag.mass:.4f} Da")
209        print(f"  m/z: {frag.mz:.4f}")
210        print(f"  Charge: {frag.charge_state}")
211        print(f"  Neutral mass: {frag.neutral_mass:.4f} Da")
212        if frag.composition:
213            comp_str = {str(elem): count for elem, count in frag.composition.items()}
214            print(f"  Composition: {comp_str}")
215
216    # ============================================================================
217    # MZPAF OUTPUT
218    # ============================================================================
219
220    print("\n" + "=" * 60)
221    print("MZPAF OUTPUT")
222    print("=" * 60)
223
224    # See paftacular documentation for details on mzPAF format
225
226    print("\nFragment annotations in mzPAF format:")
227    fragments: list[pt.Fragment] = peptide.fragment(ion_types=["b", "y"], charges=[2])
228    for frag in fragments[:8]:  # Show first 8 fragments
229        mzpaf = frag.to_mzpaf()
230        print(f"  {mzpaf}")
231
232    # serialize() with format parameter also works
233    print("\nUsing serialize(format='mzpaf'):")
234    for frag in fragments[:4]:
235        print(f"  {frag.serialize(format='mzpaf')}")
236
237    print("\n" + "=" * 60)
238
239    # ============================================================================
240    # FAST FRAGMENT
241    # ============================================================================
242
243    print("\n" + "=" * 60)
244    print("FAST FRAGMENT")
245    print("=" * 60)
246
247    """
248    fast_fragment() uses a prefix/suffix-sum algorithm to compute fragment m/z
249    values without constructing Fragment objects. It is faster than fragment()
250    for high-throughput use cases.
251
252    Return type: dict[(IonType, charge)] -> list[float]
253    Each list has length == len(peptide), ordered fragment position 1 to N.
254
255    Limitations vs fragment():
256    - No neutral losses (H2O, NH3, custom deltas)
257    - No isotope shifts
258    - No adduct charges (integer charges only)
259    - No internal / immonium ions
260    - Raises if the annotation has unknown or interval modifications
261    """
262
263    peptide = pt.parse("PEPT[Phospho]IDE")
264
265    # --- OOP method ---
266    mz_map = peptide.fast_fragment(ion_types=["b", "y"], charges=[1, 2])
267    print(f"\nPeptide: {peptide}")
268    for (ion_type, charge), mzs in mz_map.items():
269        print(f"  ({ion_type}, z={charge}): {[round(v, 4) for v in mzs]}")
270
271    # --- Functional API (identical result) ---
272    print("\nFunctional API (pt.fast_fragment):")
273    mz_map2 = pt.fast_fragment("PEPTIDE", ion_types=["b", "y"], charges=[1])
274    for (ion_type, charge), mzs in mz_map2.items():
275        print(f"  ({ion_type}, z={charge}): {[round(v, 4) for v in mzs]}")
276
277    # --- Batch / parallel: pass a list of sequences ---
278    print("\nBatch fast_fragment (list input):")
279    sequences = ["PEPTIDE", "ACDEFGHIK", "LMNPQRST"]
280    results = pt.fast_fragment(sequences, ion_types=["y"], charges=[1])
281    for seq, mz_map3 in zip(sequences, results):
282        (ion_type, charge), mzs = next(iter(mz_map3.items()))
283        print(f"  {seq}: {[round(v, 4) for v in mzs]}")
284
285    print("\n" + "=" * 60)
286
287
288if __name__ == "__main__":
289    run()

Isotope Calculations

Calculate isotopic distributions for peptides.

 1"""
 2Isotopic Distribution Calculations
 3===================================
 4Examples of calculating isotopic distributions from ProForma annotations.
 5"""
 6
 7import peptacular as pt
 8
 9
10def run():
11    # Parse a simple peptide sequence
12    annot = pt.parse("PEPTIDE")
13
14    # ============================================================================
15    # BASIC ISOTOPIC DISTRIBUTION
16    # ============================================================================
17
18    print("=" * 60)
19    print("BASIC ISOTOPIC DISTRIBUTION")
20    print("=" * 60)
21
22    # --- Default Distribution ---
23    # Returns list of IsotopicData with mass, neutron_count, and abundance
24    # Abundances normalized so max peak = 1.0
25    dist = annot.isotopic_distribution()
26    print(f"\nPeptide: {annot.serialize()}")
27    print(f"Monoisotopic mass: {annot.mass():.3f} Da")
28    print("Default isotopic distribution:")
29    for iso in dist:
30        print(
31            f"  mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}, neutrons: {iso.neutron_count}"
32        )
33
34    # --- Control Number of Isotopes ---
35    dist_limited = annot.isotopic_distribution(max_isotopes=3)
36    print("\nLimited to 3 most abundant isotopes:")
37    for iso in dist_limited:
38        print(f"  mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}")
39
40    # --- Abundance Threshold ---
41    # Only keep isotopes with abundance >= threshold (relative to max peak)
42    dist_filtered = annot.isotopic_distribution(min_abundance_threshold=0.05)
43    print("\nFiltered (≥5% of max peak):")
44    for iso in dist_filtered:
45        print(f"  mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}")
46
47    # --- Neutron Offset Mode ---
48    # Use neutron count instead of absolute mass (useful for matching patterns)
49    dist_neutron = annot.isotopic_distribution(use_neutron_count=True)
50    print("\nNeutron offset mode:")
51    for iso in dist_neutron:
52        print(f"  neutron offset: {iso.mass:>3.0f}, abundance: {iso.abundance:>6.3f}")
53
54    # ============================================================================
55    # DISTRIBUTION RESOLUTION
56    # ============================================================================
57
58    print("\n" + "=" * 60)
59    print("DISTRIBUTION RESOLUTION")
60    print("=" * 60)
61
62    # --- High Resolution ---
63    # More decimal places for precise mass calculations
64    dist_high_res = annot.isotopic_distribution(distribution_resolution=5)
65    print("\nHigh resolution (5 decimals):")
66    for iso in dist_high_res[:3]:
67        print(f"  mass: {iso.mass:.5f} Da, abundance: {iso.abundance:>6.3f}")
68
69    # --- Low Resolution ---
70    # Simulates lower instrument precision, combines nearby masses
71    dist_low_res = annot.isotopic_distribution(distribution_resolution=2)
72    print("\nLow resolution (2 decimals):")
73    for iso in dist_low_res[:3]:
74        print(f"  mass: {iso.mass:.2f} Da, abundance: {iso.abundance:>6.3f}")
75
76    # ============================================================================
77    # COMBINING WITH COMP PARAMETERS
78    # ============================================================================
79
80    print("\n" + "=" * 60)
81    print("COMBINING WITH COMP PARAMETERS")
82    print("=" * 60)
83
84    # isotopic_distribution() accepts same parameters as comp()
85    # Combine charge, isotopes, losses, and ion type
86    dist_combined = annot.isotopic_distribution(
87        ion_type="y", charge=2, isotopes=1, deltas={"H2O": 1}
88    )
89    print("\ny-ion, +2 charge, +1 13C, -H2O:")
90    for iso in dist_combined[:4]:
91        print(f"  m/z: {iso.mass:>8.3f}, abundance: {iso.abundance:>6.3f}")
92
93
94if __name__ == "__main__":
95    run()

Physiochemical Properties

Calculate various properties like pI, hydrophobicity, etc.

  1"""
  2Sequence Property Calculations
  3===============================
  4Examples of calculating physicochemical and structural properties of peptides.
  5Note: These calculations use only the amino acid sequence; modifications are not considered.
  6"""
  7
  8import peptacular as pt
  9
 10def run():
 11    # Parse a test peptide
 12    annot = pt.parse('PEPTIDE')
 13
 14    # ============================================================================
 15    # SIMPLE PHYSICOCHEMICAL PROPERTIES
 16    # ============================================================================
 17
 18    print("=" * 60)
 19    print("PHYSICOCHEMICAL PROPERTIES")
 20    print("=" * 60)
 21
 22    # These properties return single float values
 23    print(f"Sequence: {annot}")
 24    print(f"Hydrophobicity: {annot.prop.hydrophobicity:.3f}")
 25    print(f"Flexibility: {annot.prop.flexibility:.3f}")
 26    print(f"Hydrophilicity: {annot.prop.hydrophilicity:.3f}")
 27    print(f"Surface accessibility: {annot.prop.surface_accessibility:.3f}")
 28    print(f"Polarity: {annot.prop.polarity:.3f}")
 29    print(f"Aromaticity: {annot.prop.aromaticity:.3f}")
 30    print(f"Isoelectric point (pI): {annot.prop.pi:.2f}")
 31    print(f"HPLC retention: {annot.prop.hplc:.3f}")
 32    print(f"Refractivity: {annot.prop.refractivity:.3f}")
 33
 34    # ============================================================================
 35    # STRUCTURAL PROPERTIES
 36    # ============================================================================
 37
 38    print("\n" + "=" * 60)
 39    print("STRUCTURAL PROPERTIES")
 40    print("=" * 60)
 41
 42    # Secondary structure percentages
 43    print(f"Alpha helix: {annot.prop.alpha_helix_percent:.1f}%")
 44    print(f"Beta sheet: {annot.prop.beta_sheet_percent:.1f}%")
 45    print(f"Beta turn: {annot.prop.beta_turn_percent:.1f}%")
 46    print(f"Coil: {annot.prop.coil_percent:.1f}%")
 47
 48    # Predicted secondary structure using different methods
 49    ss_dr = annot.prop.secondary_structure(pt.SecondaryStructureMethod.DELEAGE_ROUX)
 50    print(f"\nSecondary structure (Deleage-Roux method):")
 51    print(f"  Alpha helix: {ss_dr['alpha_helix']:.1f}%")
 52    print(f"  Beta sheet: {ss_dr['beta_sheet']:.1f}%")
 53    print(f"  Beta turn: {ss_dr['beta_turn']:.1f}%")
 54    print(f"  Coil: {ss_dr['coil']:.1f}%")
 55
 56    # ============================================================================
 57    # COMPOSITION-BASED PROPERTIES
 58    # ============================================================================
 59
 60    print("\n" + "=" * 60)
 61    print("COMPOSITION PROPERTIES")
 62    print("=" * 60)
 63
 64    # Amino acid composition
 65    proline_pct = annot.prop.aa_property_percentage('P')
 66    acidic_pct = annot.prop.aa_property_percentage('DE')  # D and E
 67    basic_pct = annot.prop.aa_property_percentage('KR')   # K and R
 68    print(f"Proline content: {proline_pct:.1f}%")
 69    print(f"Acidic residues (D, E): {acidic_pct:.1f}%")
 70    print(f"Basic residues (K, R): {basic_pct:.1f}%")
 71
 72    # Charge at different pH values
 73    print(f"\nNet charge at pH 7.0: {annot.prop.charge_at_ph(7.0):.2f}")
 74    print(f"Net charge at pH 3.0: {annot.prop.charge_at_ph(3.0):.2f}")
 75    print(f"Net charge at pH 11.0: {annot.prop.charge_at_ph(11.0):.2f}")
 76
 77    # ============================================================================
 78    # CUSTOM PROPERTY CALCULATIONS
 79    # ============================================================================
 80
 81    print("\n" + "=" * 60)
 82    print("CUSTOM PROPERTY CALCULATIONS")
 83    print("=" * 60)
 84
 85    # --- Basic calculation with default options ---
 86    prop = annot.prop.calc_property(
 87        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
 88        missing_aa_handling=pt.MissingAAHandling.ERROR,  # default
 89        aggregation_method=pt.AggregationMethod.SUM,     # default
 90        normalize=False,                                  # default
 91        weighting_scheme=pt.WeightingMethods.UNIFORM,    # default
 92        min_weight=0.0,                                   # default
 93        max_weight=1.0,                                   # default
 94    )
 95    print(f"Abraham-Leo hydrophobicity (sum): {prop:.2f}")
 96
 97    # --- Using string identifiers ---
 98    prop_avg = annot.prop.calc_property(
 99        scale="deleage_roux_alpha_helix",
100        missing_aa_handling="avg",
101        aggregation_method="avg"
102    )
103    print(f"Alpha helix propensity (avg): {prop_avg:.3f}")
104
105    # --- Custom scale dictionary ---
106    custom_scale = {
107        'A': 1.0, 'C': 2.0, 'D': 3.0, 'E': 4.0, 
108        'F': 5.0, 'G': 6.0, 'H': 7.0, 'I': 8.0,
109        'K': 9.0, 'L': 10.0, 'M': 11.0, 'N': 12.0,
110        'P': 13.0, 'Q': 14.0, 'R': 15.0, 'S': 16.0,
111        'T': 17.0, 'V': 18.0, 'W': 19.0, 'Y': 20.0
112    }
113    custom_prop = annot.prop.calc_property(scale=custom_scale, missing_aa_handling="avg")
114    print(f"Custom scale (sum): {custom_prop:.2f}")
115
116    # ============================================================================
117    # AVAILABLE OPTIONS FOR calc_property()
118    # ============================================================================
119
120    print("\n" + "=" * 60)
121    print("CALC_PROPERTY OPTIONS")
122    print("=" * 60)
123
124    """
125    [Scale]
126    - Use built-in scale enums (e.g., HydrophobicityScale.ABRAHAM_LEO)
127    - Use scale name as string (e.g., "abraham_leo")
128    - Provide custom dict (e.g., {'A': 1.0, 'C': 2.0, ...})
129    - ~50 built-in scales available
130
131    [missing_aa_handling]
132    - 'avg': Use average of known values
133    - 'min': Use minimum of known values
134    - 'max': Use maximum of known values
135    - 'median': Use median of known values
136    - 'error': Raise error (default)
137    - 'zero': Use 0.0
138    - 'skip': Skip missing amino acids
139
140    [aggregation_method]
141    - 'sum': Sum of amino acid values (default)
142    - 'avg': Average of amino acid values
143
144    [normalize]
145    - True: Normalize each AA's property value to [0, 1] before aggregation
146    - False: Use raw values (default)
147
148    [weighting_scheme]
149    - 'uniform': All positions weighted equally (default)
150    - 'linear': Linear weighting across sequence
151    - 'exponential': Exponential weighting
152    - 'gaussian': Gaussian weighting
153    - 'sigmoid': Sigmoid weighting
154    - 'cosine': Cosine weighting
155    - 'sinusoidal': Sinusoidal weighting
156
157    [min_weight, max_weight]
158    - Define weight range (default: 0.0 to 1.0)
159    """
160
161    # ============================================================================
162    # SLIDING WINDOW CALCULATIONS
163    # ============================================================================
164
165    print("=" * 60)
166    print("SLIDING WINDOW CALCULATIONS")
167    print("=" * 60)
168
169    # Calculate property over sliding windows
170    windows = annot.prop.property_windows(
171        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
172        window_size=4,
173        missing_aa_handling=pt.MissingAAHandling.ERROR,
174        aggregation_method=pt.AggregationMethod.SUM,
175        normalize=False,
176        weighting_scheme=pt.WeightingMethods.UNIFORM,
177        min_weight=0.0,
178        max_weight=1.0,
179    )
180    print(f"\nWindow size 4 (overlapping):")
181    print(f"  Values: {[f'{v:.2f}' for v in windows]}")
182    print(f"  Number of windows: {len(windows)}")
183
184    # Different window size
185    windows_large = annot.prop.property_windows(
186        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
187        window_size=3
188    )
189    print(f"\nWindow size 3:")
190    print(f"  Values: {[f'{v:.2f}' for v in windows_large]}")
191
192    # ============================================================================
193    # PARTITIONED WINDOW CALCULATIONS
194    # ============================================================================
195
196    print("\n" + "=" * 60)
197    print("PARTITIONED WINDOW CALCULATIONS")
198    print("=" * 60)
199
200    # Divide sequence into fixed number of non-overlapping partitions
201    partitions = annot.prop.property_partitions(
202        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
203        num_windows=3,
204        aa_overlap=0,
205        missing_aa_handling=pt.MissingAAHandling.ERROR,
206        aggregation_method=pt.AggregationMethod.SUM,
207        normalize=False,
208        weighting_scheme=pt.WeightingMethods.UNIFORM,
209        min_weight=0.0,
210        max_weight=1.0,
211    )
212    print(f"\n3 partitions (no overlap):")
213    print(f"  Values: {[f'{v:.2f}' for v in partitions]}")
214
215    # With overlap between partitions
216    partitions_overlap = annot.prop.property_partitions(
217        scale=pt.HydrophobicityScale.ABRAHAM_LEO,
218        num_windows=3,
219        aa_overlap=1
220    )
221    print(f"\n3 partitions (1 AA overlap):")
222    print(f"  Values: {[f'{v:.2f}' for v in partitions_overlap]}")
223
224    # ============================================================================
225    # PRACTICAL EXAMPLES
226    # ============================================================================
227
228    print("\n" + "=" * 60)
229    print("PRACTICAL EXAMPLES")
230    print("=" * 60)
231
232    # Example: Hydrophobicity profile for transmembrane prediction
233    tm_peptide = pt.parse('LFGAIAGFIENGWEGMIDG')
234    tm_windows = tm_peptide.prop.property_windows(
235        scale=pt.HydrophobicityScale.KYTE_DOOLITTLE,
236        window_size=9
237    )
238    print(f"\nTransmembrane peptide: {tm_peptide}")
239    print(f"Kyte-Doolittle hydrophobicity profile (window=9):")
240    for i, val in enumerate(tm_windows):
241        print(f"  Position {i+1}: {val:.2f}")
242
243    # Example: Charge distribution analysis
244    charged_peptide = pt.parse('PKDEPKDE')
245    charge_partitions = charged_peptide.prop.property_partitions(
246        scale={'K': 1, 'R': 1, 'D': -1, 'E': -1},  # Simple charge scale
247        num_windows=4,
248        aa_overlap=0,
249        missing_aa_handling='zero'
250    )
251    print(f"\nCharged peptide: {charged_peptide}")
252    print(f"Charge distribution (4 regions):")
253    for i, val in enumerate(charge_partitions):
254        print(f"  Region {i+1}: {val:+.1f}")
255
256    print("\n" + "=" * 60)
257
258
259if __name__ == "__main__":
260    run()

Converters

Convert sequences from other tools (IP2, DIANN, Casanovo, MS2PIP) to ProForma format.

 1"""
 2Sequence Format Conversion Examples
 3====================================
 4Examples of converting peptide sequences from other tools (IP2, DIANN, Casanovo)
 5to ProForma 2.0 format. All conversion functions support parallel execution.
 6"""
 7
 8import peptacular as pt
 9
10
11def run():
12    # ============================================================================
13    # IP2 SEQUENCE CONVERSION
14    # ============================================================================
15
16    print("=" * 60)
17    print("IP2 SEQUENCE CONVERSION")
18    print("=" * 60)
19
20    # Basic IP2 format: K.SEQUENCE.K
21    ip2_seq = "K.PEPTIDE.K"
22    proforma = pt.convert_ip2_sequence(ip2_seq)
23    print(f"IP2: {ip2_seq}")
24    print(f"ProForma: {proforma}\n")
25
26    # ============================================================================
27    # DIANN SEQUENCE CONVERSION
28    # ============================================================================
29
30    print("\n" + "=" * 60)
31    print("DIANN SEQUENCE CONVERSION")
32    print("=" * 60)
33
34    # With modification
35    diann_mod = "_YMGTLRGC[Carbamidomethyl]LLRLYHD_"
36    proforma_mod = pt.convert_diann_sequence(diann_mod)
37    print(f"DIANN with mod: {diann_mod}")
38    print(f"ProForma: {proforma_mod}\n")
39
40    # ============================================================================
41    # CASANOVO SEQUENCE CONVERSION
42    # ============================================================================
43
44    print("\n" + "=" * 60)
45    print("CASANOVO SEQUENCE CONVERSION")
46    print("=" * 60)
47
48    # Complex example
49    casanovo_complex = "+43.006P+100EPTIDE"
50    proforma_complex = pt.convert_casanovo_sequence(casanovo_complex)
51    print(f"Casanovo complex: {casanovo_complex}")
52    print(f"ProForma: {proforma_complex}")
53
54    # Parse Casanovo format using annotation method
55    casanovo_annot = pt.ProFormaAnnotation.from_casanovo("+43.006PEPTIDE")
56    print(f"\nCasanovo (annotation method): {casanovo_annot.serialize()}")
57    print(f"  Mass: {casanovo_annot.mass():.4f} Da")
58
59    # ============================================================================
60    # MS2PIP FORMAT CONVERSION
61    # ============================================================================
62
63    print("\n" + "=" * 60)
64    print("MS2PIP FORMAT CONVERSION")
65    print("=" * 60)
66
67    # Convert TO MS2PIP format
68    pf_annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
69    unmod_seq, mod_str = pf_annot.to_ms2_pip()
70    print(f"\nProForma: {pf_annot.serialize()}")
71    print(f"MS2PIP sequence: {unmod_seq}")
72    print(f"MS2PIP mods: {mod_str}")
73
74    # Convert FROM MS2PIP format
75    ms2pip_annot = pt.ProFormaAnnotation.from_ms2_pip(
76        sequence="PEPTIDE", mod_str="0|Acetyl|3|Oxidation"
77    )
78    print(f"\nMS2PIP -> ProForma: {ms2pip_annot.serialize()}")
79
80    # With static modifications
81    ms2pip_static = pt.ProFormaAnnotation.from_ms2_pip(
82        sequence="PEPTIDE", mod_str="0|Acetyl", static_mods={"C": "Carbamidomethyl"}
83    )
84    print(f"MS2PIP with static mods: {ms2pip_static.serialize()}")
85
86
87if __name__ == "__main__":
88    run()