Examples
This section provides practical examples demonstrating key features of Peptacular.
ProForma Notation
Basic usage of ProForma notation for representing modified peptides.
1"""
2ProForma Notation Examples
3===========================
4Comprehensive examples of supported ProForma 2.0 notation in peptacular.
5Demonstrates parsing and serialization of various modification types and features.
6"""
7
8import peptacular as pt
9
10
11def run():
12 # ============================================================================
13 # BASIC SEQUENCES
14 # ============================================================================
15
16 print("=" * 60)
17 print("BASIC SEQUENCES")
18 print("=" * 60)
19
20 # Simple unmodified peptide
21 simple = pt.parse("PEPTIDE")
22 print(f"Simple sequence: {simple.serialize()}")
23
24 # ============================================================================
25 # TERMINAL MODIFICATIONS
26 # ============================================================================
27
28 print("\n" + "=" * 60)
29 print("TERMINAL MODIFICATIONS")
30 print("=" * 60)
31
32 # Both terminals modified
33 both = pt.parse("[Acetyl]-PEPTIDE-[Amidated]")
34 print(f"Both terminals: {both.serialize()}")
35
36 # Multiple N-terminal modifications
37 multi_nterm = pt.parse("[Acetyl][Formyl]-PEPTIDE")
38 print(f"Multiple N-term mods: {multi_nterm.serialize()}")
39
40 # ============================================================================
41 # INTERNAL MODIFICATIONS
42 # ============================================================================
43
44 print("\n" + "=" * 60)
45 print("INTERNAL MODIFICATIONS")
46 print("=" * 60)
47
48 # Multiple different modifications
49 multi_internal = pt.parse("PEM[Oxidation]TIS[Phospho]DE")
50 print(f"Multiple modifications: {multi_internal.serialize()}")
51
52 # Multiple modifications on same residue
53 same_residue = pt.parse("PEM[Oxidation][Dioxidation]TIDE")
54 print(f"Multiple mods on M: {same_residue.serialize()}")
55
56 # ============================================================================
57 # MODIFICATION NOTATION TYPES
58 # ============================================================================
59
60 print("\n" + "=" * 60)
61 print("MODIFICATION NOTATION TYPES")
62 print("=" * 60)
63
64 # By name (Unimod/PSI-MOD)
65 by_name = pt.parse("PEM[Oxidation]TIDE")
66 print(f"By name: {by_name.serialize()}")
67
68 # By accession number requires the UNIMOD: or MOD: prefix for Unimod/PSI-MOD respectively
69 by_accession = pt.parse("PEM[UNIMOD:35]TIDE")
70 print(f"By Unimod accession: {by_accession.serialize()}")
71
72 # By mass (delta mass). requires sign (+/-)
73 by_mass = pt.parse("PEM[+15.995]TIDE")
74 print(f"By mass shift: {by_mass.serialize()}")
75 neg_mass = pt.parse("PEPTIDE[-18.011]")
76 print(f"Negative mass shift: {neg_mass.serialize()}")
77
78
79 # By formula (requires Formula: prefix)
80 by_formula = pt.parse("PEM[Formula:O]TIDE")
81 print(f"By formula: {by_formula.serialize()}")
82
83 # by glycan composition (requires Glycan: prefix)
84 by_glycan = pt.parse("NEEYN[Glycan:Hex5HexNAc4]K")
85 print(f"By glycan composition: {by_glycan.serialize()}")
86
87
88 # ============================================================================
89 # CHARGE STATES
90 # ============================================================================
91
92 print("\n" + "=" * 60)
93 print("CHARGE STATES")
94 print("=" * 60)
95
96 # Positive charge
97 charged_pos = pt.parse("PEPTIDE/2")
98 print(f"Charge +2: {charged_pos.serialize()}")
99
100 # Negative charge
101 charged_neg = pt.parse("PEPTIDE/-2")
102 print(f"Charge -2: {charged_neg.serialize()}")
103
104 # ============================================================================
105 # CHARGE ADDUCTS
106 # ============================================================================
107
108 print("\n" + "=" * 60)
109 print("CHARGE ADDUCTS")
110 print("=" * 60)
111
112 # Single adduct (Total charge = +1)
113 na_adduct = pt.parse("PEPTIDE/[Na:z+1]")
114 print(f"Sodium adduct: {na_adduct.serialize()}")
115
116 # Multiple copies of same adduct (Total charge = +2)
117 multi_adduct = pt.parse("PEPTIDE/[Na:z+1^2]")
118 print(f"Two sodium adducts: {multi_adduct.serialize()}")
119
120 # Multiple different adducts (separated by commas) (Total charge = +3)
121 mixed_adducts = pt.parse("PEPTIDE/[Na:z+1^2,H:z+1]")
122 print(f"Mixed adducts: {mixed_adducts.serialize()}")
123
124 # Metal adduct with charge (Total charge = +2)
125 zn_adduct = pt.parse("PEPTIDE/[Zn:z+2]")
126 print(f"Zinc adduct (+2): {zn_adduct.serialize()}")
127
128 # ============================================================================
129 # LABILE MODIFICATIONS
130 # ============================================================================
131
132 print("\n" + "=" * 60)
133 print("LABILE MODIFICATIONS")
134 print("=" * 60)
135
136 labile = pt.parse("{Glycan:Hex}PEPTIDE")
137 print(f"Labile glycan: {labile.serialize()}")
138
139 multi_labile = pt.parse("{Phospho}PEPTIDE")
140 print(f"Multiple labile: {multi_labile.serialize()}")
141
142 # ============================================================================
143 # GLYCAN NOTATION
144 # ============================================================================
145
146 print("\n" + "=" * 60)
147 print("GLYCAN NOTATION")
148 print("=" * 60)
149
150 # Simple glycan
151 simple_glycan = pt.parse("NEEYN[Glycan:Hex5HexNAc4]K")
152 print(f"N-glycan: {simple_glycan.serialize()}")
153
154 # ============================================================================
155 # FIXED/STATIC MODIFICATIONS
156 # ============================================================================
157
158 print("\n" + "=" * 60)
159 print("FIXED/STATIC MODIFICATIONS")
160 print("=" * 60)
161
162 # Fixed modification applied to all matching residues (M and T on all positions)
163 fixed_mod = pt.parse("<[Oxidation]@M,T>MEMTIMDE")
164 print(f"Fixed oxidation on all M and T: {fixed_mod.serialize()}")
165
166 # Multiple fixed modifications
167 multi_fixed = pt.parse("<[Oxidation]@M><[Phospho]@S>MSPETIDE")
168 print(f"Multiple fixed mods: {multi_fixed.serialize()}")
169
170 # Fixed modification with position rules (N-term Proline)
171 fixed_nterm = pt.parse("<[Acetyl]@N-term:P>PEPTIDE")
172 print(f"Fixed N-term mod: {fixed_nterm.serialize()}")
173
174 # Fixed modification with position rules (Any C-term)
175 fixed_cterm = pt.parse("<[Amidated]@C-term>PEPTIDE")
176 print(f"Fixed C-term mod: {fixed_cterm.serialize()}")
177
178 # ============================================================================
179 # ISOTOPE LABELING
180 # ============================================================================
181
182 print("\n" + "=" * 60)
183 print("ISOTOPE LABELING")
184 print("=" * 60)
185
186 # C13 labeling (all carbons)
187 c13 = pt.parse("<13C>PEPTIDE")
188 print(f"C13 labeled: {c13.serialize()}")
189
190 # N15 labeling
191 n15 = pt.parse("<15N>PEPTIDE")
192 print(f"N15 labeled: {n15.serialize()}")
193
194 # Multiple isotope labels
195 multi_isotope = pt.parse("<13C><15N>PEPTIDE")
196 print(f"C13 and N15 labeled: {multi_isotope.serialize()}")
197
198 # Deuterium labeling
199 deuterium = pt.parse("<2H>PEP[Oxidation]TIDE")
200 print(f"Deuterium labeled: {deuterium.serialize()}")
201
202 # ============================================================================
203 # AMBIGUOUS MODIFICATIONS (UNKNOWN POSITION)
204 # ============================================================================
205
206 print("\n" + "=" * 60)
207 print("AMBIGUOUS MODIFICATIONS (UNKNOWN POSITION)")
208 print("=" * 60)
209
210 # Unknown position
211 unknown_pos = pt.parse("[Phospho]?PEPTIDE")
212 print(f"Phospho somewhere: {unknown_pos.serialize()}")
213
214 # Multiple unknown modifications (Support caret for specifying multiple occurrences)
215 multi_unknown = pt.parse("[Phospho]^2[Acetyl]?PEPTIDE")
216 print(f"Multiple unknown: {multi_unknown.serialize()}")
217
218 # ============================================================================
219 # INTERVAL NOTATION (AMBIGUOUS LOCALIZATION)
220 # ============================================================================
221
222 print("\n" + "=" * 60)
223 print("INTERVAL NOTATION (LOCALIZATION RANGES)")
224 print("=" * 60)
225
226 # Modification in a range (1-indexed, inclusive)
227 interval = pt.parse("P(EP)[Phospho]TIDE")
228 print(f"Phospho in positions 1-3: {interval.serialize()}")
229
230 # Ambiguous interval (EP or PT or something with similar mass)
231 ambiguous_interval = pt.parse("P(?EP)[Phospho]TIDE")
232 print(f"Ambiguous intervals: {ambiguous_interval.serialize()}")
233
234
235 # ============================================================================
236 # INFO TAGS
237 # ============================================================================
238
239 print("\n" + "=" * 60)
240 print("INFO TAGS (NON-MODIFICATION ANNOTATIONS)")
241 print("=" * 60)
242
243 # Info tag (no mass contribution)
244 info_tag = pt.parse("PEPT[INFO:test]IDE")
245 print(f"Info tag: {info_tag.serialize()}")
246
247
248 # ============================================================================
249 # PEPTIDE NAMING
250 # ============================================================================
251
252 print("\n" + "=" * 60)
253 print("PEPTIDE NAMING")
254 print("=" * 60)
255
256 # Peptidoform name
257 peptide_name = pt.parse("(>MyPeptide)PEPTIDE")
258 print(f"Peptide name: {peptide_name.serialize()}")
259
260
261 # ============================================================================
262 # Multiple FEATURES COMBINED
263 # ============================================================================
264
265
266 print("\n" + "=" * 60)
267 print("MULTIPLE FEATURES COMBINED")
268 print("=" * 60)
269
270 # Combined info tag and modification
271 multi_info = pt.parse("PEPT[Phospho|INFO:quality=high]IDE")
272 print(f"Info + modification: {multi_info.serialize()}")
273
274 # Technically this is valid but no reason to do this. Peptacular only looks at the first modification in such cases.
275 multi_annot2 = pt.parse("PEPT[Phospho|Oxidation|+76.0]IDE")
276 print(f"Info + modification: {multi_annot2.serialize()}")
277
278
279if __name__ == "__main__":
280 run()
Mass, m/z, and Composition
Calculate masses, m/z ratios, and elemental compositions.
1"""
2Mass and Composition Calculations
3==================================
4Examples of calculating mass, m/z, and elemental composition from ProForma annotations.
5"""
6
7import peptacular as pt
8
9
10def run():
11 # Parse a simple peptide sequence
12 annot = pt.parse("PEPTIDE")
13
14 # ============================================================================
15 # MASS CALCULATIONS
16 # ============================================================================
17
18 print("=" * 60)
19 print("MASS CALCULATIONS")
20 print("=" * 60)
21
22 # --- Basic Mass Calculation ---
23 # Default is monoisotopic precursor mass (includes terminal groups H and OH)
24 mass = annot.mass()
25 print(f"Default mass: {mass:.4f} Da") # 799.3600
26
27 # Explicitly specify precursor ion type
28 mass_p = annot.mass(ion_type="p")
29 print(f"Precursor mass: {mass_p:.4f} Da")
30
31 # --- Neutral Mass (no terminal groups) ---
32 neutral = annot.mass(ion_type=pt.IonType.NEUTRAL)
33 print(f"Neutral mass: {neutral:.4f} Da")
34
35 # --- m/z Calculation ---
36 # mz() divides mass by charge
37 mz_2plus = annot.mz(charge=2)
38 assert mz_2plus == annot.mass(charge=2) / 2
39 print(f"m/z at charge +2: {mz_2plus:.4f}")
40
41 # --- Monoisotopic vs Average Mass ---
42 mono_mass = annot.mass(monoisotopic=True) # default
43 avg_mass = annot.mass(monoisotopic=False)
44 print(f"Monoisotopic: {mono_mass:.4f} Da")
45 print(f"Average: {avg_mass:.4f} Da")
46
47 # --- Charge States ---
48 # Integer charge assumes protonation/deprotonation
49 mass_2plus = annot.mass(charge=2)
50 mass_2minus = annot.mass(charge=-2)
51 print(f"Mass at +2 charge: {mass_2plus:.4f} Da")
52 print(f"Mass at -2 charge: {mass_2minus:.4f} Da")
53
54 # Adduct charges (overrides annotation charge)
55 mass_na = annot.mass(charge="Na:z+1")
56 mass_multi_adduct = annot.mass(charge=("Na:z+1^2", "H:z+1"))
57 print(f"Mass with Na+ adduct: {mass_na:.4f} Da")
58 print(f"Mass with multiple adducts: {mass_multi_adduct:.4f} Da")
59
60 # --- Isotopes ---
61 # Integer assumes C13 isotopes
62 mass_c13 = annot.mass(isotopes=1)
63 print(f"Mass with 1x 13C: {mass_c13:.4f} Da")
64
65 # Custom isotope specification
66 mass_custom_iso = annot.mass(isotopes={"17O": 2, "13C": 1})
67 print(f"Mass with 2x 17O and 1x 13C: {mass_custom_iso:.4f} Da")
68
69 # --- Neutral Losses ---
70 # Single loss
71 mass_water_loss = annot.mass(deltas={"H2O": 1})
72 print(f"Mass with H2O loss: {mass_water_loss:.4f} Da")
73
74 # Multiple losses
75 mass_multi_loss = annot.mass(
76 deltas={pt.NeutralDelta.WATER: 1, pt.NeutralDelta.AMMONIA: 2}
77 )
78 print(f"Mass with H2O + 2×NH3 loss: {mass_multi_loss:.4f} Da")
79
80 # ============================================================================
81 # COMPOSITION CALCULATIONS
82 # ============================================================================
83
84 print("\n" + "=" * 60)
85 print("COMPOSITION CALCULATIONS")
86 print("=" * 60)
87
88 # --- Basic Composition ---
89 # Returns a Counter of ElementInfo objects
90 comp = annot.comp()
91 print("\nFull composition (ElementInfo objects):")
92 for elem, count in comp.items():
93 print(f" {elem.symbol}: {count}")
94
95 # Convert to simple string representation
96 comp_str = {str(elem): count for elem, count in comp.items()}
97 print(f"\nSimple composition: {comp_str}")
98
99 # --- Composition with Modifications ---
100 # Apply charge and isotopes
101 comp_modified = annot.comp(charge="Na:z+1", isotopes={"17O": 2, "13C": 1})
102 comp_modified_str = {str(elem): count for elem, count in comp_modified.items()}
103 print(f"\nModified composition: {comp_modified_str}")
104
105 # ============================================================================
106 # WITH GLOBAL ISOTOPE MODIFICATIONS
107 # ============================================================================
108
109 print("\n" + "=" * 60)
110 print("GLOBAL ISOTOPE MODIFICATIONS")
111 print("=" * 60)
112
113 # Applies the global isotope to all residues
114 iso_annot = pt.parse("<13C>PEPTIDE")
115 iso_comp = iso_annot.comp()
116 iso_comp_str = {str(elem): count for elem, count in iso_comp.items()}
117 print(f"Composition with global 13C: {iso_comp_str}")
118
119 # will also apply isotopes to modifications where available
120 # Charge is applied after isotopes so will reflect in composition
121 mod_iso_annot = pt.parse("<2H>PEPT[Phospho]IDE/2")
122 mod_iso_comp = mod_iso_annot.comp()
123 mod_iso_comp_str = {str(elem): count for elem, count in mod_iso_comp.items()}
124 print(f"Composition with global 13C and Phospho mod: {mod_iso_comp_str}")
125
126
127if __name__ == "__main__":
128 run()
Annotation
Parse and work with ProForma annotations.
1"""
2ProForma Annotation Examples
3=============================
4Basic examples of parsing, serializing, and manipulating ProForma annotations.
5"""
6
7import peptacular as pt
8
9
10def run():
11 # ============================================================================
12 # PARSING ANNOTATIONS
13 # ============================================================================
14
15 # Simple sequence
16 simple: pt.ProFormaAnnotation = pt.parse("PEPTIDE")
17 print(f"Simple: {simple.serialize()}")
18
19 # Chimeric sequence
20 chimeric: list[pt.ProFormaAnnotation] = pt.parse_chimeric("PEPTIDE+PEPTIDE")
21 print(f"Chimeric: {pt.serialize_chimeric(chimeric)}")
22
23 # ============================================================================
24 # CREATING ANNOTATIONS PROGRAMMATICALLY
25 # ============================================================================
26
27 # Create from scratch
28 annot = pt.ProFormaAnnotation(sequence="PEPTIDE", charge=2)
29 print(f"New annotation: {annot.serialize()}")
30
31 # Set internal Mods... it takes a dict of position -> {mod: count}
32 annot = pt.ProFormaAnnotation(
33 sequence="PEPTIDE", charge=2, internal_mods={2: {"Oxidation": 1}}
34 )
35 print(f"New annotation: {annot.serialize()}")
36
37 # Other modications are just {mod: count}
38 annot = pt.ProFormaAnnotation(
39 sequence="PEPTIDE",
40 nterm_mods={"Acetyl": 1},
41 internal_mods={2: {"Oxidation": 1, "Phospho": 1}},
42 charge=2,
43 )
44 print(f"New annotation: {annot.serialize()}")
45
46 # ============================================================================
47 # ACCESSING PROPERTIES
48 # ============================================================================
49
50 print("\n" + "=" * 60)
51 print("ACCESSING PROPERTIES")
52 print("=" * 60)
53
54 annot = pt.parse("[Acetyl]-PEM[Oxidation]TIS[Phospho]DE/2")
55 print(f"Annotation: {annot.serialize()}\n")
56
57 print(f"Sequence: {annot.sequence}")
58 print(f"Length: {len(annot)}")
59 print(f"Charge state: {annot.charge_state}")
60 print(f"Has N-term mods: {annot.has_nterm_mods}")
61 print(f"Has internal mods: {annot.has_internal_mods}")
62 print(f"Has charge: {annot.has_charge}")
63
64 # ============================================================================
65 # SETTING MODIFICATIONS
66 # ============================================================================
67
68 print("\n" + "=" * 60)
69 print("SETTING MODIFICATIONS")
70 print("=" * 60)
71
72 # Start fresh
73 annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
74
75 # Set N-terminal modification
76 annot.set_nterm_mods({"Acetyl": 1})
77 print(f"After N-term: {annot.serialize()}")
78
79 # Set internal modification at specific position
80 annot.set_internal_mods_at_index(2, {"Oxidation": 1})
81 print(f"After internal: {annot.serialize()}")
82
83 # Set charge
84 annot.set_charge(2)
85 print(f"After charge: {annot.serialize()}")
86
87 # ============================================================================
88 # APPENDING MODIFICATIONS
89 # ============================================================================
90
91 print("\n" + "=" * 60)
92 print("APPENDING MODIFICATIONS")
93 print("=" * 60)
94
95 annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
96
97 # Append N-terminal mod
98 annot.append_nterm_mod("Acetyl")
99 print(f"Append N-term: {annot.serialize()}")
100
101 # Append internal mod
102 annot.append_internal_mod_at_index(2, "Oxidation")
103 print(f"Append internal: {annot.serialize()}")
104
105 # Append another internal mod at same position
106 annot.append_internal_mod_at_index(2, "Phospho")
107 print(f"Append another: {annot.serialize()}")
108
109 # ============================================================================
110 # EXTENDING MODIFICATIONS
111 # ============================================================================
112
113 print("\n" + "=" * 60)
114 print("EXTENDING MODIFICATIONS")
115 print("=" * 60)
116
117 annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
118
119 # Extend with multiple mods
120 annot.extend_nterm_mods(["Acetyl", "Formyl"])
121 print(f"Extend N-term: {annot.serialize()}")
122
123 # Extend internal mods at position
124 annot.extend_internal_mods_at_index(2, ["Oxidation", "Phospho"])
125 print(f"Extend internal: {annot.serialize()}")
126
127 # ============================================================================
128 # REMOVING MODIFICATIONS
129 # ============================================================================
130
131 print("\n" + "=" * 60)
132 print("REMOVING MODIFICATIONS")
133 print("=" * 60)
134
135 annot = pt.parse("[Acetyl]-PEM[Oxidation]TIS[Phospho]DE/2")
136 print(f"Original: {annot.serialize()}")
137
138 # Clear specific mod type (removes all)
139 annot.clear_nterm_mods()
140 print(f"Clear N-term: {annot.serialize()}")
141
142 # Clear internal mod at position (removes all at that position)
143 annot.clear_internal_mod_at_index(2)
144 print(f"Clear position 2: {annot.serialize()}")
145
146 # Clear all mods
147 annot.clear_mods()
148 print(f"Clear all: {annot.serialize()}")
149
150 # ============================================================================
151 # DECREMENTING MODIFICATIONS
152 # ============================================================================
153
154 print("\n" + "=" * 60)
155 print("DECREMENTING MODIFICATIONS")
156 print("=" * 60)
157
158 # When you have multiple copies of a mod, remove() decrements the count
159 annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
160 annot.extend_nterm_mods(["Acetyl", "Acetyl", "Formyl"])
161 print(f"Original: {annot.serialize()}")
162
163 # Remove one Acetyl (decrements count)
164 annot.remove_nterm_mod("Acetyl")
165 print(f"After removing 1 Acetyl: {annot.serialize()}")
166
167 # Remove another Acetyl
168 annot.remove_nterm_mod("Acetyl")
169 print(f"After removing another Acetyl: {annot.serialize()}")
170
171 # Remove Formyl
172 annot.remove_nterm_mod("Formyl")
173 print(f"After removing Formyl: {annot.serialize()}")
174
175 # Works with internal mods too
176 annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
177 annot.extend_internal_mods_at_index(2, ["Oxidation", "Oxidation", "Phospho"])
178 print(f"\nWith internal mods: {annot.serialize()}")
179
180 annot.remove_internal_mod_at_index(2, "Oxidation")
181 print(f"Remove 1 Oxidation: {annot.serialize()}")
182
183 annot.remove_internal_mod_at_index(2, "Oxidation")
184 print(f"Remove another Oxidation: {annot.serialize()}")
185
186 # ============================================================================
187 # POPPING MODIFICATIONS
188 # ============================================================================
189
190 print("\n" + "=" * 60)
191 print("POPPING MODIFICATIONS")
192 print("=" * 60)
193
194 annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE/2")
195 print(f"Original: {annot.serialize()}")
196
197 # Pop N-term mods (returns the mods)
198 nterm = annot.pop_nterm_mods()
199 print(f"Popped N-term: {nterm}")
200 print(f"After pop: {annot.serialize()}")
201
202 # Pop charge
203 charge = annot.pop_charge()
204 print(f"Popped charge: {charge}")
205 print(f"After pop charge: {annot.serialize()}")
206
207 # ============================================================================
208 # WORKING WITH STATIC MODS
209 # ============================================================================
210
211 print("\n" + "=" * 60)
212 print("STATIC MODIFICATIONS")
213 print("=" * 60)
214
215 # Add static mod by residue
216 annot = pt.ProFormaAnnotation(sequence="PEPTIDE")
217 annot.add_static_mod_by_residue("E", "Oxidation")
218 print(f"Static mod on E: {annot.serialize()}")
219
220 # Condense to internal mods
221 annot.condense_static_mods()
222 print(f"Condensed: {annot.serialize()}")
223
224 # ============================================================================
225 # SLICING
226 # ============================================================================
227
228 print("\n" + "=" * 60)
229 print("SLICING")
230 print("=" * 60)
231
232 annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
233 print(f"Original: {annot.serialize()}")
234
235 # Slice using indices
236 sub = annot[2:5]
237 print(f"Slice [2:5]: {sub.serialize()}")
238
239 # Slice preserves modifications
240 sub_with_mod = annot[1:4]
241 print(f"Slice [1:4]: {sub_with_mod.serialize()}")
242
243 # ============================================================================
244 # COPYING
245 # ============================================================================
246
247 print("\n" + "=" * 60)
248 print("COPYING")
249 print("=" * 60)
250
251 annot = pt.parse("PEM[Oxidation]TIDE")
252 print(f"Original: {annot.serialize()}")
253
254 # Make a copy
255 copy = annot.copy()
256 copy.append_nterm_mod("Acetyl")
257 print(f"Copy modified: {copy.serialize()}")
258 print(f"Original unchanged: {annot.serialize()}")
259
260 # ============================================================================
261 # CHECKING MODIFICATIONS
262 # ============================================================================
263
264 print("\n" + "=" * 60)
265 print("CHECKING FOR MODIFICATIONS")
266 print("=" * 60)
267
268 annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE/2")
269 print(f"Annotation: {annot.serialize()}\n")
270
271 # Check for specific mod types
272 print(f"Has N-term mods: {annot.has_nterm_mods}")
273 print(f"Has C-term mods: {annot.has_cterm_mods}")
274 print(f"Has internal mods: {annot.has_internal_mods}")
275 print(f"Has charge: {annot.has_charge}")
276
277 # Check if has any mods
278 print(f"Has any mods: {annot.has_mods()}")
279 print(
280 f"Has internal/charge: {annot.has_mods([pt.ModType.INTERNAL, pt.ModType.CHARGE])}"
281 )
282 print(f"Has internal/charge: {annot.has_mods(['internal', 'charge'])}")
283
284 # ============================================================================
285 # SERIALIZATION OPTIONS
286 # ============================================================================
287
288 print("\n" + "=" * 60)
289 print("SERIALIZATION")
290 print("=" * 60)
291
292 annot = pt.parse("[Acetyl]-PEPTIDE/2")
293
294 # Full serialization
295 print(f"Full: {annot.serialize()}")
296
297 # Strip mods before serializing
298 stripped = annot.copy().strip_mods()
299 print(f"Stripped: {stripped.serialize()}")
300
301 # ============================================================================
302 # VALIDATION
303 # ============================================================================
304
305 print("\n" + "=" * 60)
306 print("VALIDATION")
307 print("=" * 60)
308
309 # By default, validation is OFF for performance
310 annot_no_val = pt.ProFormaAnnotation(sequence="PEPTIDE", validate=False)
311 print(f"No validation (default): {annot_no_val.serialize()}")
312
313 # Enable validation when creating (and for methods that modify the annotation)
314 annot_with_val = pt.ProFormaAnnotation(sequence="PEPTIDE", validate=True)
315 print(f"With validation: {annot_with_val.serialize()}")
316
317 # Validation checks modification syntax (can be disabled per method)
318 print("\nAttempting to add invalid modification with validation ON:")
319 try:
320 annot_with_val.append_internal_mod_at_index(0, "InvalidMod123")
321 print(f" Error: Modification added unexpectedly: {annot_with_val.serialize()}")
322 except Exception as e:
323 # successfully raises error
324 print(f"Successfully caught error: {e}")
325
326 # Validation checks modification syntax (can be disabled per method)
327 print("\nAttempting to add invalid modification with validation OFF:")
328 try:
329 annot_with_val.append_internal_mod_at_index(0, "InvalidMod123", validate=False)
330 print(f" Success (no validation): {annot_no_val.serialize()}")
331 except Exception as e:
332 print(f" Error: {e}")
333
334
335if __name__ == "__main__":
336 run()
Annotation with Modification Objects
Advanced annotation examples using modification objects.
1"""
2ProForma Mod Objects Examples
3==============================
4Demonstrates working with Mods and Mod objects returned from ProForma annotations.
5"""
6
7import peptacular as pt
8
9
10def run():
11 # ============================================================================
12 # ACCESSING MODS OBJECTS
13 # ============================================================================
14
15 # Parse a ProForma annotation with various modifications
16 annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation][Phospho]TIS[Phospho]DE/2")
17 print(f"Annotation: {annot.serialize()}\n")
18
19 # Access different mod collections - these return Mods objects
20 print("N-terminal mods:", annot.nterm_mods)
21 print("Internal mods at pos 2:", annot.get_internal_mods_at_index(2))
22 print("Internal mods at pos 5:", annot.get_internal_mods_at_index(5))
23
24 # ============================================================================
25 # ITERATING OVER MODS
26 # ============================================================================
27
28 print("\n" + "=" * 60)
29 print("ITERATING OVER MODS")
30 print("=" * 60)
31
32 annot = pt.parse("[Acetyl][Acetyl][Formyl]-PEPTIDE")
33 nterm = annot.nterm_mods
34
35 print(f"N-terminal mods: {nterm}\n")
36
37 # Iterate through Mod objects
38 print("Individual Mod objects:")
39 for mod in nterm:
40 print(f" {mod.value} (count: {mod.count})")
41
42 # ============================================================================
43 # WORKING WITH MOD OBJECTS
44 # ============================================================================
45
46 print("\n" + "=" * 60)
47 print("MOD OBJECT PROPERTIES")
48 print("=" * 60)
49
50 annot = pt.parse("PEM[Oxidation][Oxidation][Phospho]TIDE")
51 internal_mods = annot.get_internal_mods_at_index(2)
52
53 print(f"Internal mods at position 2: {internal_mods}\n")
54
55 for mod in internal_mods:
56 print(f"Modification: {mod.value}")
57 print(f" Count: {mod.count}")
58 print(f" Mass (mono): {mod.get_mass(monoisotopic=True):.4f}")
59 print(f" Mass (avg): {mod.get_mass(monoisotopic=False):.4f}")
60 print(f" Composition: {mod.get_composition()}")
61 print(f" Charge: {mod.get_charge()}")
62 print()
63
64 # ============================================================================
65 # ACCESSING PARSED MOD VALUES
66 # ============================================================================
67
68 print("\n" + "=" * 60)
69 print("PARSED MOD VALUES")
70 print("=" * 60)
71
72 annot = pt.parse("PEM[Oxidation]TIS[Phospho]DE")
73
74 # Get mods at position 2 (M with Oxidation)
75 mods_at_2 = annot.get_internal_mods_at_index(2)
76 print(f"Mods at position 2: {mods_at_2}\n")
77
78 # Access parsed items as (modification, count) tuples
79 print("Parsed items:")
80 for mod_value, count in mods_at_2.parse_items():
81 print(f" {mod_value} × {count}")
82 print(f" Type: {type(mod_value)}")
83
84 # ============================================================================
85 # CHECKING MOD PRESENCE
86 # ============================================================================
87
88 print("\n" + "=" * 60)
89 print("CHECKING MOD PRESENCE")
90 print("=" * 60)
91
92 annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation]TIDE")
93 nterm = annot.nterm_mods
94
95 print(f"N-terminal mods: {nterm}\n")
96 print(f"Contains 'Acetyl': {'Acetyl' in nterm}")
97 print(f"Contains 'Phospho': {'Phospho' in nterm}")
98 print(f"Contains 'Formyl': {'Formyl' in nterm}")
99
100 # ============================================================================
101 # WORKING WITH DIFFERENT MOD TYPES
102 # ============================================================================
103
104 print("\n" + "=" * 60)
105 print("DIFFERENT MOD TYPES")
106 print("=" * 60)
107
108 # Isotope modifications
109 annot = pt.parse("<15N>PEPTIDE")
110 isotope = annot.isotope_mods
111 print(f"Isotope mods: {isotope}")
112 print(f" Type: {isotope.mod_type}")
113 print(f" Serialized: {isotope.serialize()}\n")
114
115 # Static modifications
116 annot = pt.parse("<[Carbamidomethyl]@C>PEPTCDE")
117 static = annot.static_mods
118 print(f"Static mods: {static}")
119 print(f" Type: {static.mod_type}")
120 print(f" Serialized: {static.serialize()}\n")
121
122 # Labile modifications
123 annot = pt.parse("{Glycan:Hex}PEPTIDE")
124 labile = annot.labile_mods
125 print(f"Labile mods: {labile}")
126 print(f" Type: {labile.mod_type}")
127 print(f" Serialized: {labile.serialize()}\n")
128
129 # Charge adducts
130 annot = pt.parse("PEPTIDE/[Na:z+1]")
131 charge = annot.charge_adducts
132 print(f"Charge adducts: {charge}")
133 print(f" Type: {charge.mod_type}")
134 print(f" Serialized: {charge.serialize()}")
135
136 # ============================================================================
137 # MOD COPYING
138 # ============================================================================
139
140 print("\n" + "=" * 60)
141 print("COPYING MOD OBJECTS")
142 print("=" * 60)
143
144 annot = pt.parse("[Acetyl]-PEPTIDE")
145 nterm = annot.nterm_mods
146
147 print(f"Original: {nterm}")
148
149 # Copy a Mods collection
150 nterm_copy = nterm.copy()
151 print(f"Copy: {nterm_copy}")
152 print(f"Are they equal: {nterm._mods == nterm_copy._mods}")
153 print(f"Are they the same object: {nterm is nterm_copy}")
154
155 # ============================================================================
156 # WORKING WITH MULTIPLE MODS AT SAME POSITION
157 # ============================================================================
158
159 print("\n" + "=" * 60)
160 print("MULTIPLE MODS AT SAME POSITION")
161 print("=" * 60)
162
163 annot = pt.parse("PEM[Oxidation][Oxidation][Phospho][Acetyl]TIDE")
164 mods = annot.get_internal_mods_at_index(2)
165
166 print(f"Mods at position 2: {mods}\n")
167
168 print("Individual modifications:")
169 for mod in mods:
170 print(f" {mod.value} × {mod.count}")
171
172 print(f"\nTotal mass contribution: {mods.get_mass():.4f}")
173 print(f"Total composition: {mods.get_composition()}")
174
175 # ============================================================================
176 # ACCESSING UNDERLYING PROFORMA COMPONENTS
177 # ============================================================================
178
179 print("\n" + "=" * 60)
180 print("UNDERLYING PROFORMA COMPONENTS")
181 print("=" * 60)
182
183 # Different modification formats
184 annot1 = pt.parse("PEM[Oxidation]TIDE")
185 annot2 = pt.parse("PEM[UNIMOD:35]TIDE")
186 annot3 = pt.parse("PEM[+15.995]TIDE")
187 annot4 = pt.parse("PEM[Formula:O]TIDE")
188
189 print("Different representations of Oxidation:\n")
190
191 for i, annot in enumerate([annot1, annot2, annot3, annot4], 1):
192 mods = annot.get_internal_mods_at_index(2)
193 for mod in mods:
194 print(f"{i}. {annot.serialize()}")
195 print(f" Parsed value: {mod.value}")
196 print(f" Type: {type(mod.value).__name__}")
197 print(f" Mass: {mod.get_mass():.4f}\n")
198
199 # ============================================================================
200 # VALIDATING MODS
201 # ============================================================================
202
203 print("=" * 60)
204 print("VALIDATING MODS")
205 print("=" * 60)
206
207 annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
208 nterm = annot.nterm_mods
209 internal = annot.get_internal_mods_at_index(2)
210
211 print(f"N-terminal mods valid: {nterm.is_valid}")
212 print(f"Validation result: {nterm.validate()}")
213
214 print(f"\nInternal mods valid: {internal.is_valid}")
215 print(f"Validation result: {internal.validate()}")
216
217 # ============================================================================
218 # WORKING WITH INTERVALS
219 # ============================================================================
220
221 print("\n" + "=" * 60)
222 print("INTERVAL MODIFICATIONS")
223 print("=" * 60)
224
225 annot = pt.parse("PEP(TIS)[Phospho]DE")
226
227 print(f"Annotation: {annot.serialize()}\n")
228
229 if annot.has_intervals:
230 print("Intervals:")
231 for interval in annot.intervals:
232 print(f" Range: {interval.start}-{interval.end}")
233 print(f" Ambiguous: {interval.ambiguous}")
234 print(f" Has mods: {interval.has_mods}")
235 if interval.has_mods:
236 print(f" Mods: {interval.mods}")
237 for mod in interval.mods:
238 print(f" {mod.value} × {mod.count}")
239
240 # ============================================================================
241 # SERIALIZATION
242 # ============================================================================
243
244 print("\n" + "=" * 60)
245 print("SERIALIZATION")
246 print("=" * 60)
247
248 annot = pt.parse("[Acetyl][Formyl]-PEM[Oxidation][Phospho]TIDE/2")
249
250 print("Full annotation:", annot.serialize())
251 print("\nIndividual mod serialization:")
252 print(f" N-term: {annot.nterm_mods.serialize()}")
253 print(f" Position 2: {annot.get_internal_mods_at_index(2).serialize()}")
254
255 # Show how mods serialize differently based on type
256 print("\nMod type serialization patterns:")
257 examples = [
258 ("[Acetyl]-PEPTIDE", "nterm_mods"),
259 ("PEPTIDE-[Amidated]", "cterm_mods"),
260 ("{Glycan:Hex}PEPTIDE", "labile_mods"),
261 ("[Phospho]?PEPTIDE", "unknown_mods"),
262 ("<15N>PEPTIDE", "isotope_mods"),
263 ("PEPTIDE/[Na:z+1]", "charge_adducts"),
264 ]
265
266 for proforma, attr in examples:
267 annot = pt.parse(proforma)
268 mods = getattr(annot, attr)
269 print(f" {proforma:30s} -> {mods.serialize()}")
270
271
272if __name__ == "__main__":
273 run()
Digestion
Digest protein sequences with various proteases.
1"""
2Protein Digestion Examples
3===========================
4Simple examples of in-silico enzymatic digestion using ProForma.
5All digestion methods return Span objects (start, end, missed_cleavages).
6Use annotation[span] to get the actual peptide.
7"""
8
9import peptacular as pt
10
11
12def run():
13 # ============================================================================
14 # SIMPLE DIGESTION (AA Based)
15 # ============================================================================
16
17 protein = pt.parse("[Amidated]-PEPTIDEKPEPTIDERPEPT[Phospho]IDER-[+57]")
18
19 print("=" * 60)
20 print("SIMPLE DIGESTION (AA BASED)")
21 print("=" * 60)
22 print(f"Protein: {protein}\n")
23
24 # Basic trypsin-like digestion
25 print("Trypsin-like (cleave after K/R):")
26 for span in protein.simple_digest(cleave_on="KR"):
27 peptide = protein[span]
28 print(f" {peptide.serialize()} - span: {span}")
29
30 # With restrictions
31 print("\nWith restrictions (cleave after K/R, but not before N or after P):")
32 for span in protein.simple_digest(
33 cleave_on="KR", restrict_before="N", restrict_after="P", cterminal=True
34 ):
35 print(f" {protein[span].serialize()}")
36
37 # ============================================================================
38 # DIGESTION (REGEX BASED)
39 # ============================================================================
40
41 print("\n" + "=" * 60)
42 print("DIGESTION (REGEX BASED)")
43 print("=" * 60)
44
45 # Using predefined enzyme enum
46 print("\nUsing Proteases enum:")
47 for span in protein.digest(pt.Proteases.TRYPSIN):
48 print(f" {protein[span].serialize()}")
49
50 # Using enzyme string
51 print("\nUsing enzyme string 'trypsin':")
52 for span in protein.digest("trypsin"):
53 print(f" {protein[span].serialize()}")
54
55 # Custom regex
56 print("\nCustom regex (cleave after A or E):")
57 for span in protein.digest("(?<=[AE])"):
58 print(f" {protein[span].serialize()}")
59
60 # ============================================================================
61 # CLEAVAGE SITES
62 # ============================================================================
63
64 print("\n" + "=" * 60)
65 print("CLEAVAGE SITES")
66 print("=" * 60)
67
68 print("\nCleavage positions for trypsin (after K/R):")
69 sites = list(
70 protein.simple_cleavage_sites(
71 cleave_on="KR",
72 restrict_after="P",
73 restrict_before="N",
74 cterminal=True,
75 )
76 )
77 print(f" Sites: {sites}")
78 print(f" Sequence: {protein.sequence}")
79 print(
80 f" {''.join('^' if i in sites else ' ' for i in range(len(protein.sequence)))}"
81 )
82
83 print("\nCleavage positions for included trypsin regex:")
84 # can also use Proteases.TRYPSIN or custom regex
85 sites_regex = list(protein.cleavage_sites("trypsin"))
86 print(f" Sites: {sites_regex}")
87
88 # ============================================================================
89 # MISSED CLEAVAGES
90 # ============================================================================
91
92 print("\n" + "=" * 60)
93 print("MISSED CLEAVAGES")
94 print("=" * 60)
95
96 print("\nWith 1 missed cleavage:")
97 for span in protein.digest("trypsin", missed_cleavages=1):
98 print(f" {protein[span].serialize()}")
99
100 # ============================================================================
101 # LENGTH FILTERING
102 # ============================================================================
103
104 print("\n" + "=" * 60)
105 print("LENGTH FILTERING")
106 print("=" * 60)
107
108 print("\nPeptides between 7-15 amino acids:")
109 for span in protein.digest("trypsin", min_len=7, max_len=15):
110 peptide = protein[span]
111 print(f" {peptide.serialize()} (length: {len(peptide)})")
112
113 # ============================================================================
114 # SEMI-ENZYMATIC DIGESTION
115 # ============================================================================
116
117 print("\n" + "=" * 60)
118 print("SEMI-ENZYMATIC")
119 print("=" * 60)
120
121 print("\nSemi-enzymatic (one end must be enzymatic):")
122 for span in protein.digest("trypsin", semi=True, min_len=5, max_len=10):
123 print(f" {protein[span].serialize()}")
124
125
126if __name__ == "__main__":
127 run()
Fragmentation
Generate theoretical fragment ions for peptides.
1"""
2Fragment Generation Examples
3=============================
4Examples of generating fragment ions from ProForma annotations.
5All fragment methods return Fragment objects with mass, m/z, and composition.
6"""
7
8import peptacular as pt
9
10
11def run():
12 # ============================================================================
13 # BASIC FRAGMENTATION
14 # ============================================================================
15
16 peptide = pt.parse("PEPT[Phospho]IDE-[Acetyl]")
17
18 print("=" * 60)
19 print("BASIC FRAGMENTATION")
20 print("=" * 60)
21 print(f"Peptide: {peptide}\n")
22
23 # --- b-ions (N-terminal fragments) ---
24 print("b-ions (N-terminal):")
25 for frag in peptide.fragment(ion_types=["b"]):
26 print(f" {frag}")
27
28 # --- y-ions (C-terminal fragments) ---
29 print("\ny-ions (C-terminal):")
30 for frag in peptide.fragment(ion_types=["y"]):
31 print(f" {frag}")
32
33 # ============================================================================
34 # FRAGMENT ION TYPES
35 # ============================================================================
36
37 print("\n" + "=" * 60)
38 print("DIFFERENT ION TYPES")
39 print("=" * 60)
40
41 # Generate multiple ion types at once
42 print("\na, b, c ions:")
43 for frag in peptide.fragment(ion_types=["a", "b", "c"]):
44 print(f" {frag}")
45
46 print("\nx, y, z ions:")
47 for frag in peptide.fragment(ion_types=["x", "y", "z"]):
48 print(f" {frag}")
49
50 # ============================================================================
51 # CHARGED FRAGMENTS
52 # ============================================================================
53
54 print("\n" + "=" * 60)
55 print("CHARGED FRAGMENTS")
56 print("=" * 60)
57
58 # Charge state
59 print("\nb-ions at +2 charge:")
60 for frag in peptide.fragment(ion_types=["b"], charges=[2]):
61 print(f" {frag}")
62
63 # Adduct charges
64 print("\ny-ions with Na+ adduct:")
65 for frag in peptide.fragment(ion_types=["y"], charges=["Na:z+1"]):
66 print(f" {frag}")
67
68 # ============================================================================
69 # DELTAS (User Specified)
70 # ============================================================================
71
72 print("\n" + "=" * 60)
73 print("DELTAS")
74 print("=" * 60)
75
76 # -18 loss applied to all ions
77 print("\ny-ions with -18 loss:")
78 for frag in peptide.fragment(
79 ion_types=["y"],
80 deltas=[-18.0], # Custom delta of -18.0 Da
81 ):
82 print(f" {frag}")
83
84 # By default deltas is (None,) so to also generate fragments with no losses you must include None
85 print("\ny-ions with -18 and No loss:")
86 for frag in peptide.fragment(
87 ion_types=["y"],
88 deltas=[-18.0, None], # Custom delta of -18.0 Da and no loss
89 ):
90 print(f" {frag}")
91
92 # ============================================================================
93 # NEUTRAL DELTAS
94 # ============================================================================
95
96 # in addition neutral deltas can be specified to apply common losses like H2O or NH3 to appropriate fragments
97 # these work in addition to any custom deltas specified above
98
99 print("\n" + "=" * 60)
100 print("NEUTRAL DELTAS")
101 print("=" * 60)
102
103 # Water loss (Selectively applied to fragments that can lose H2O (containing ["S", "T", "D", "E"]))
104 print("\ny-ions with H2O loss:")
105 for frag in peptide.fragment(
106 ion_types=["y"],
107 neutral_deltas=["H2O"],
108 max_ndeltas=2,
109 ):
110 print(f" {frag}")
111
112 # Multiple losses. Can also specify neutral deltas as their enum types
113 print("\nb-ions with H2O and NH3 loss:")
114 for frag in peptide.fragment(
115 ion_types=["b"],
116 neutral_deltas=[pt.NeutralDelta.WATER, pt.NeutralDelta.AMMONIA],
117 max_ndeltas=2,
118 ):
119 print(f" {frag}")
120
121 # ============================================================================
122 # ISOTOPES
123 # ============================================================================
124
125 print("\n" + "=" * 60)
126 print("ISOTOPIC FRAGMENTS")
127 print("=" * 60)
128
129 # C13 isotopes
130 print("\ny-ions with 1x 13C:")
131 for frag in peptide.fragment(ion_types=["y"], isotopes=[1]):
132 print(f" {frag}")
133
134 # Custom isotopes
135 print("\nb-ions with 2x 17O:")
136 for frag in peptide.fragment(ion_types=["b"], isotopes=[{"17O": 2}]):
137 print(f" {frag}")
138
139 # ============================================================================
140 # INTERNAL FRAGMENTS
141 # ============================================================================
142
143 print("\n" + "=" * 60)
144 print("INTERNAL FRAGMENTS")
145 print("=" * 60)
146
147 print("\nInternal fragments (min_len=3, max_len=5):")
148 for frag in peptide.fragment(ion_types=["ax"]):
149 if frag.position and isinstance(frag.position, tuple):
150 start, end = frag.position
151 if 3 <= (end - start) <= 5:
152 print(f" {frag}")
153
154 # ============================================================================
155 # IMMONIUM IONS
156 # ============================================================================
157
158 print("\n" + "=" * 60)
159 print("IMMONIUM IONS")
160 print("=" * 60)
161
162 print("\nImmonium ions:")
163 for frag in peptide.fragment(ion_types=["i"]):
164 print(f" {frag}")
165
166 # ============================================================================
167 # PRECURSOR ION
168 # ============================================================================
169
170 print("\n" + "=" * 60)
171 print("PRECURSOR ION")
172 print("=" * 60)
173
174 print("\nPrecursor ion at +2 charge:")
175 for frag in peptide.fragment(ion_types=["p"], charges=[2]):
176 print(f" {frag}")
177
178 # ============================================================================
179 # COMBINING OPTIONS
180 # ============================================================================
181
182 print("\n" + "=" * 60)
183 print("COMBINING OPTIONS")
184 print("=" * 60)
185
186 print("\ny-ions: +2 charge, H2O loss, 1x 13C:")
187 for frag in peptide.fragment(ion_types=["y"], charges=[2], neutral_deltas=["H2O"], isotopes=[1]):
188 print(f" {frag}")
189
190 # ============================================================================
191 # ACCESSING FRAGMENT PROPERTIES
192 # ============================================================================
193
194 print("\n" + "=" * 60)
195 print("FRAGMENT PROPERTIES")
196 print("=" * 60)
197
198 """
199 Unless otherwise specified, fragments do not include sequence or composition data.
200 This can be enabled with the `include_sequence` and `calculate_composition` flags.
201 """
202 b_ions: list[pt.Fragment] = peptide.fragment(ion_types=["b"], charges=[2], calculate_composition=True)
203 if len(b_ions) > 0:
204 frag = b_ions[0]
205 print(f"\nExample fragment: {frag}")
206 print(f" Ion type: {frag.ion_type}")
207 print(f" Position: {frag.position}")
208 print(f" Mass: {frag.mass:.4f} Da")
209 print(f" m/z: {frag.mz:.4f}")
210 print(f" Charge: {frag.charge_state}")
211 print(f" Neutral mass: {frag.neutral_mass:.4f} Da")
212 if frag.composition:
213 comp_str = {str(elem): count for elem, count in frag.composition.items()}
214 print(f" Composition: {comp_str}")
215
216 # ============================================================================
217 # MZPAF OUTPUT
218 # ============================================================================
219
220 print("\n" + "=" * 60)
221 print("MZPAF OUTPUT")
222 print("=" * 60)
223
224 # See paftacular documentation for details on mzPAF format
225
226 print("\nFragment annotations in mzPAF format:")
227 fragments: list[pt.Fragment] = peptide.fragment(ion_types=["b", "y"], charges=[2])
228 for frag in fragments[:8]: # Show first 8 fragments
229 mzpaf = frag.to_mzpaf()
230 print(f" {mzpaf}")
231
232 # serialize() with format parameter also works
233 print("\nUsing serialize(format='mzpaf'):")
234 for frag in fragments[:4]:
235 print(f" {frag.serialize(format='mzpaf')}")
236
237 print("\n" + "=" * 60)
238
239 # ============================================================================
240 # FAST FRAGMENT
241 # ============================================================================
242
243 print("\n" + "=" * 60)
244 print("FAST FRAGMENT")
245 print("=" * 60)
246
247 """
248 fast_fragment() uses a prefix/suffix-sum algorithm to compute fragment m/z
249 values without constructing Fragment objects. It is faster than fragment()
250 for high-throughput use cases.
251
252 Return type: dict[(IonType, charge)] -> list[float]
253 Each list has length == len(peptide), ordered fragment position 1 to N.
254
255 Limitations vs fragment():
256 - No neutral losses (H2O, NH3, custom deltas)
257 - No isotope shifts
258 - No adduct charges (integer charges only)
259 - No internal / immonium ions
260 - Raises if the annotation has unknown or interval modifications
261 """
262
263 peptide = pt.parse("PEPT[Phospho]IDE")
264
265 # --- OOP method ---
266 mz_map = peptide.fast_fragment(ion_types=["b", "y"], charges=[1, 2])
267 print(f"\nPeptide: {peptide}")
268 for (ion_type, charge), mzs in mz_map.items():
269 print(f" ({ion_type}, z={charge}): {[round(v, 4) for v in mzs]}")
270
271 # --- Functional API (identical result) ---
272 print("\nFunctional API (pt.fast_fragment):")
273 mz_map2 = pt.fast_fragment("PEPTIDE", ion_types=["b", "y"], charges=[1])
274 for (ion_type, charge), mzs in mz_map2.items():
275 print(f" ({ion_type}, z={charge}): {[round(v, 4) for v in mzs]}")
276
277 # --- Batch / parallel: pass a list of sequences ---
278 print("\nBatch fast_fragment (list input):")
279 sequences = ["PEPTIDE", "ACDEFGHIK", "LMNPQRST"]
280 results = pt.fast_fragment(sequences, ion_types=["y"], charges=[1])
281 for seq, mz_map3 in zip(sequences, results):
282 (ion_type, charge), mzs = next(iter(mz_map3.items()))
283 print(f" {seq}: {[round(v, 4) for v in mzs]}")
284
285 print("\n" + "=" * 60)
286
287
288if __name__ == "__main__":
289 run()
Isotope Calculations
Calculate isotopic distributions for peptides.
1"""
2Isotopic Distribution Calculations
3===================================
4Examples of calculating isotopic distributions from ProForma annotations.
5"""
6
7import peptacular as pt
8
9
10def run():
11 # Parse a simple peptide sequence
12 annot = pt.parse("PEPTIDE")
13
14 # ============================================================================
15 # BASIC ISOTOPIC DISTRIBUTION
16 # ============================================================================
17
18 print("=" * 60)
19 print("BASIC ISOTOPIC DISTRIBUTION")
20 print("=" * 60)
21
22 # --- Default Distribution ---
23 # Returns list of IsotopicData with mass, neutron_count, and abundance
24 # Abundances normalized so max peak = 1.0
25 dist = annot.isotopic_distribution()
26 print(f"\nPeptide: {annot.serialize()}")
27 print(f"Monoisotopic mass: {annot.mass():.3f} Da")
28 print("Default isotopic distribution:")
29 for iso in dist:
30 print(
31 f" mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}, neutrons: {iso.neutron_count}"
32 )
33
34 # --- Control Number of Isotopes ---
35 dist_limited = annot.isotopic_distribution(max_isotopes=3)
36 print("\nLimited to 3 most abundant isotopes:")
37 for iso in dist_limited:
38 print(f" mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}")
39
40 # --- Abundance Threshold ---
41 # Only keep isotopes with abundance >= threshold (relative to max peak)
42 dist_filtered = annot.isotopic_distribution(min_abundance_threshold=0.05)
43 print("\nFiltered (≥5% of max peak):")
44 for iso in dist_filtered:
45 print(f" mass: {iso.mass:>8.3f} Da, abundance: {iso.abundance:>6.3f}")
46
47 # --- Neutron Offset Mode ---
48 # Use neutron count instead of absolute mass (useful for matching patterns)
49 dist_neutron = annot.isotopic_distribution(use_neutron_count=True)
50 print("\nNeutron offset mode:")
51 for iso in dist_neutron:
52 print(f" neutron offset: {iso.mass:>3.0f}, abundance: {iso.abundance:>6.3f}")
53
54 # ============================================================================
55 # DISTRIBUTION RESOLUTION
56 # ============================================================================
57
58 print("\n" + "=" * 60)
59 print("DISTRIBUTION RESOLUTION")
60 print("=" * 60)
61
62 # --- High Resolution ---
63 # More decimal places for precise mass calculations
64 dist_high_res = annot.isotopic_distribution(distribution_resolution=5)
65 print("\nHigh resolution (5 decimals):")
66 for iso in dist_high_res[:3]:
67 print(f" mass: {iso.mass:.5f} Da, abundance: {iso.abundance:>6.3f}")
68
69 # --- Low Resolution ---
70 # Simulates lower instrument precision, combines nearby masses
71 dist_low_res = annot.isotopic_distribution(distribution_resolution=2)
72 print("\nLow resolution (2 decimals):")
73 for iso in dist_low_res[:3]:
74 print(f" mass: {iso.mass:.2f} Da, abundance: {iso.abundance:>6.3f}")
75
76 # ============================================================================
77 # COMBINING WITH COMP PARAMETERS
78 # ============================================================================
79
80 print("\n" + "=" * 60)
81 print("COMBINING WITH COMP PARAMETERS")
82 print("=" * 60)
83
84 # isotopic_distribution() accepts same parameters as comp()
85 # Combine charge, isotopes, losses, and ion type
86 dist_combined = annot.isotopic_distribution(
87 ion_type="y", charge=2, isotopes=1, deltas={"H2O": 1}
88 )
89 print("\ny-ion, +2 charge, +1 13C, -H2O:")
90 for iso in dist_combined[:4]:
91 print(f" m/z: {iso.mass:>8.3f}, abundance: {iso.abundance:>6.3f}")
92
93
94if __name__ == "__main__":
95 run()
Physiochemical Properties
Calculate various properties like pI, hydrophobicity, etc.
1"""
2Sequence Property Calculations
3===============================
4Examples of calculating physicochemical and structural properties of peptides.
5Note: These calculations use only the amino acid sequence; modifications are not considered.
6"""
7
8import peptacular as pt
9
10def run():
11 # Parse a test peptide
12 annot = pt.parse('PEPTIDE')
13
14 # ============================================================================
15 # SIMPLE PHYSICOCHEMICAL PROPERTIES
16 # ============================================================================
17
18 print("=" * 60)
19 print("PHYSICOCHEMICAL PROPERTIES")
20 print("=" * 60)
21
22 # These properties return single float values
23 print(f"Sequence: {annot}")
24 print(f"Hydrophobicity: {annot.prop.hydrophobicity:.3f}")
25 print(f"Flexibility: {annot.prop.flexibility:.3f}")
26 print(f"Hydrophilicity: {annot.prop.hydrophilicity:.3f}")
27 print(f"Surface accessibility: {annot.prop.surface_accessibility:.3f}")
28 print(f"Polarity: {annot.prop.polarity:.3f}")
29 print(f"Aromaticity: {annot.prop.aromaticity:.3f}")
30 print(f"Isoelectric point (pI): {annot.prop.pi:.2f}")
31 print(f"HPLC retention: {annot.prop.hplc:.3f}")
32 print(f"Refractivity: {annot.prop.refractivity:.3f}")
33
34 # ============================================================================
35 # STRUCTURAL PROPERTIES
36 # ============================================================================
37
38 print("\n" + "=" * 60)
39 print("STRUCTURAL PROPERTIES")
40 print("=" * 60)
41
42 # Secondary structure percentages
43 print(f"Alpha helix: {annot.prop.alpha_helix_percent:.1f}%")
44 print(f"Beta sheet: {annot.prop.beta_sheet_percent:.1f}%")
45 print(f"Beta turn: {annot.prop.beta_turn_percent:.1f}%")
46 print(f"Coil: {annot.prop.coil_percent:.1f}%")
47
48 # Predicted secondary structure using different methods
49 ss_dr = annot.prop.secondary_structure(pt.SecondaryStructureMethod.DELEAGE_ROUX)
50 print(f"\nSecondary structure (Deleage-Roux method):")
51 print(f" Alpha helix: {ss_dr['alpha_helix']:.1f}%")
52 print(f" Beta sheet: {ss_dr['beta_sheet']:.1f}%")
53 print(f" Beta turn: {ss_dr['beta_turn']:.1f}%")
54 print(f" Coil: {ss_dr['coil']:.1f}%")
55
56 # ============================================================================
57 # COMPOSITION-BASED PROPERTIES
58 # ============================================================================
59
60 print("\n" + "=" * 60)
61 print("COMPOSITION PROPERTIES")
62 print("=" * 60)
63
64 # Amino acid composition
65 proline_pct = annot.prop.aa_property_percentage('P')
66 acidic_pct = annot.prop.aa_property_percentage('DE') # D and E
67 basic_pct = annot.prop.aa_property_percentage('KR') # K and R
68 print(f"Proline content: {proline_pct:.1f}%")
69 print(f"Acidic residues (D, E): {acidic_pct:.1f}%")
70 print(f"Basic residues (K, R): {basic_pct:.1f}%")
71
72 # Charge at different pH values
73 print(f"\nNet charge at pH 7.0: {annot.prop.charge_at_ph(7.0):.2f}")
74 print(f"Net charge at pH 3.0: {annot.prop.charge_at_ph(3.0):.2f}")
75 print(f"Net charge at pH 11.0: {annot.prop.charge_at_ph(11.0):.2f}")
76
77 # ============================================================================
78 # CUSTOM PROPERTY CALCULATIONS
79 # ============================================================================
80
81 print("\n" + "=" * 60)
82 print("CUSTOM PROPERTY CALCULATIONS")
83 print("=" * 60)
84
85 # --- Basic calculation with default options ---
86 prop = annot.prop.calc_property(
87 scale=pt.HydrophobicityScale.ABRAHAM_LEO,
88 missing_aa_handling=pt.MissingAAHandling.ERROR, # default
89 aggregation_method=pt.AggregationMethod.SUM, # default
90 normalize=False, # default
91 weighting_scheme=pt.WeightingMethods.UNIFORM, # default
92 min_weight=0.0, # default
93 max_weight=1.0, # default
94 )
95 print(f"Abraham-Leo hydrophobicity (sum): {prop:.2f}")
96
97 # --- Using string identifiers ---
98 prop_avg = annot.prop.calc_property(
99 scale="deleage_roux_alpha_helix",
100 missing_aa_handling="avg",
101 aggregation_method="avg"
102 )
103 print(f"Alpha helix propensity (avg): {prop_avg:.3f}")
104
105 # --- Custom scale dictionary ---
106 custom_scale = {
107 'A': 1.0, 'C': 2.0, 'D': 3.0, 'E': 4.0,
108 'F': 5.0, 'G': 6.0, 'H': 7.0, 'I': 8.0,
109 'K': 9.0, 'L': 10.0, 'M': 11.0, 'N': 12.0,
110 'P': 13.0, 'Q': 14.0, 'R': 15.0, 'S': 16.0,
111 'T': 17.0, 'V': 18.0, 'W': 19.0, 'Y': 20.0
112 }
113 custom_prop = annot.prop.calc_property(scale=custom_scale, missing_aa_handling="avg")
114 print(f"Custom scale (sum): {custom_prop:.2f}")
115
116 # ============================================================================
117 # AVAILABLE OPTIONS FOR calc_property()
118 # ============================================================================
119
120 print("\n" + "=" * 60)
121 print("CALC_PROPERTY OPTIONS")
122 print("=" * 60)
123
124 """
125 [Scale]
126 - Use built-in scale enums (e.g., HydrophobicityScale.ABRAHAM_LEO)
127 - Use scale name as string (e.g., "abraham_leo")
128 - Provide custom dict (e.g., {'A': 1.0, 'C': 2.0, ...})
129 - ~50 built-in scales available
130
131 [missing_aa_handling]
132 - 'avg': Use average of known values
133 - 'min': Use minimum of known values
134 - 'max': Use maximum of known values
135 - 'median': Use median of known values
136 - 'error': Raise error (default)
137 - 'zero': Use 0.0
138 - 'skip': Skip missing amino acids
139
140 [aggregation_method]
141 - 'sum': Sum of amino acid values (default)
142 - 'avg': Average of amino acid values
143
144 [normalize]
145 - True: Normalize each AA's property value to [0, 1] before aggregation
146 - False: Use raw values (default)
147
148 [weighting_scheme]
149 - 'uniform': All positions weighted equally (default)
150 - 'linear': Linear weighting across sequence
151 - 'exponential': Exponential weighting
152 - 'gaussian': Gaussian weighting
153 - 'sigmoid': Sigmoid weighting
154 - 'cosine': Cosine weighting
155 - 'sinusoidal': Sinusoidal weighting
156
157 [min_weight, max_weight]
158 - Define weight range (default: 0.0 to 1.0)
159 """
160
161 # ============================================================================
162 # SLIDING WINDOW CALCULATIONS
163 # ============================================================================
164
165 print("=" * 60)
166 print("SLIDING WINDOW CALCULATIONS")
167 print("=" * 60)
168
169 # Calculate property over sliding windows
170 windows = annot.prop.property_windows(
171 scale=pt.HydrophobicityScale.ABRAHAM_LEO,
172 window_size=4,
173 missing_aa_handling=pt.MissingAAHandling.ERROR,
174 aggregation_method=pt.AggregationMethod.SUM,
175 normalize=False,
176 weighting_scheme=pt.WeightingMethods.UNIFORM,
177 min_weight=0.0,
178 max_weight=1.0,
179 )
180 print(f"\nWindow size 4 (overlapping):")
181 print(f" Values: {[f'{v:.2f}' for v in windows]}")
182 print(f" Number of windows: {len(windows)}")
183
184 # Different window size
185 windows_large = annot.prop.property_windows(
186 scale=pt.HydrophobicityScale.ABRAHAM_LEO,
187 window_size=3
188 )
189 print(f"\nWindow size 3:")
190 print(f" Values: {[f'{v:.2f}' for v in windows_large]}")
191
192 # ============================================================================
193 # PARTITIONED WINDOW CALCULATIONS
194 # ============================================================================
195
196 print("\n" + "=" * 60)
197 print("PARTITIONED WINDOW CALCULATIONS")
198 print("=" * 60)
199
200 # Divide sequence into fixed number of non-overlapping partitions
201 partitions = annot.prop.property_partitions(
202 scale=pt.HydrophobicityScale.ABRAHAM_LEO,
203 num_windows=3,
204 aa_overlap=0,
205 missing_aa_handling=pt.MissingAAHandling.ERROR,
206 aggregation_method=pt.AggregationMethod.SUM,
207 normalize=False,
208 weighting_scheme=pt.WeightingMethods.UNIFORM,
209 min_weight=0.0,
210 max_weight=1.0,
211 )
212 print(f"\n3 partitions (no overlap):")
213 print(f" Values: {[f'{v:.2f}' for v in partitions]}")
214
215 # With overlap between partitions
216 partitions_overlap = annot.prop.property_partitions(
217 scale=pt.HydrophobicityScale.ABRAHAM_LEO,
218 num_windows=3,
219 aa_overlap=1
220 )
221 print(f"\n3 partitions (1 AA overlap):")
222 print(f" Values: {[f'{v:.2f}' for v in partitions_overlap]}")
223
224 # ============================================================================
225 # PRACTICAL EXAMPLES
226 # ============================================================================
227
228 print("\n" + "=" * 60)
229 print("PRACTICAL EXAMPLES")
230 print("=" * 60)
231
232 # Example: Hydrophobicity profile for transmembrane prediction
233 tm_peptide = pt.parse('LFGAIAGFIENGWEGMIDG')
234 tm_windows = tm_peptide.prop.property_windows(
235 scale=pt.HydrophobicityScale.KYTE_DOOLITTLE,
236 window_size=9
237 )
238 print(f"\nTransmembrane peptide: {tm_peptide}")
239 print(f"Kyte-Doolittle hydrophobicity profile (window=9):")
240 for i, val in enumerate(tm_windows):
241 print(f" Position {i+1}: {val:.2f}")
242
243 # Example: Charge distribution analysis
244 charged_peptide = pt.parse('PKDEPKDE')
245 charge_partitions = charged_peptide.prop.property_partitions(
246 scale={'K': 1, 'R': 1, 'D': -1, 'E': -1}, # Simple charge scale
247 num_windows=4,
248 aa_overlap=0,
249 missing_aa_handling='zero'
250 )
251 print(f"\nCharged peptide: {charged_peptide}")
252 print(f"Charge distribution (4 regions):")
253 for i, val in enumerate(charge_partitions):
254 print(f" Region {i+1}: {val:+.1f}")
255
256 print("\n" + "=" * 60)
257
258
259if __name__ == "__main__":
260 run()
Converters
Convert sequences from other tools (IP2, DIANN, Casanovo, MS2PIP) to ProForma format.
1"""
2Sequence Format Conversion Examples
3====================================
4Examples of converting peptide sequences from other tools (IP2, DIANN, Casanovo)
5to ProForma 2.0 format. All conversion functions support parallel execution.
6"""
7
8import peptacular as pt
9
10
11def run():
12 # ============================================================================
13 # IP2 SEQUENCE CONVERSION
14 # ============================================================================
15
16 print("=" * 60)
17 print("IP2 SEQUENCE CONVERSION")
18 print("=" * 60)
19
20 # Basic IP2 format: K.SEQUENCE.K
21 ip2_seq = "K.PEPTIDE.K"
22 proforma = pt.convert_ip2_sequence(ip2_seq)
23 print(f"IP2: {ip2_seq}")
24 print(f"ProForma: {proforma}\n")
25
26 # ============================================================================
27 # DIANN SEQUENCE CONVERSION
28 # ============================================================================
29
30 print("\n" + "=" * 60)
31 print("DIANN SEQUENCE CONVERSION")
32 print("=" * 60)
33
34 # With modification
35 diann_mod = "_YMGTLRGC[Carbamidomethyl]LLRLYHD_"
36 proforma_mod = pt.convert_diann_sequence(diann_mod)
37 print(f"DIANN with mod: {diann_mod}")
38 print(f"ProForma: {proforma_mod}\n")
39
40 # ============================================================================
41 # CASANOVO SEQUENCE CONVERSION
42 # ============================================================================
43
44 print("\n" + "=" * 60)
45 print("CASANOVO SEQUENCE CONVERSION")
46 print("=" * 60)
47
48 # Complex example
49 casanovo_complex = "+43.006P+100EPTIDE"
50 proforma_complex = pt.convert_casanovo_sequence(casanovo_complex)
51 print(f"Casanovo complex: {casanovo_complex}")
52 print(f"ProForma: {proforma_complex}")
53
54 # Parse Casanovo format using annotation method
55 casanovo_annot = pt.ProFormaAnnotation.from_casanovo("+43.006PEPTIDE")
56 print(f"\nCasanovo (annotation method): {casanovo_annot.serialize()}")
57 print(f" Mass: {casanovo_annot.mass():.4f} Da")
58
59 # ============================================================================
60 # MS2PIP FORMAT CONVERSION
61 # ============================================================================
62
63 print("\n" + "=" * 60)
64 print("MS2PIP FORMAT CONVERSION")
65 print("=" * 60)
66
67 # Convert TO MS2PIP format
68 pf_annot = pt.parse("[Acetyl]-PEM[Oxidation]TIDE")
69 unmod_seq, mod_str = pf_annot.to_ms2_pip()
70 print(f"\nProForma: {pf_annot.serialize()}")
71 print(f"MS2PIP sequence: {unmod_seq}")
72 print(f"MS2PIP mods: {mod_str}")
73
74 # Convert FROM MS2PIP format
75 ms2pip_annot = pt.ProFormaAnnotation.from_ms2_pip(
76 sequence="PEPTIDE", mod_str="0|Acetyl|3|Oxidation"
77 )
78 print(f"\nMS2PIP -> ProForma: {ms2pip_annot.serialize()}")
79
80 # With static modifications
81 ms2pip_static = pt.ProFormaAnnotation.from_ms2_pip(
82 sequence="PEPTIDE", mod_str="0|Acetyl", static_mods={"C": "Carbamidomethyl"}
83 )
84 print(f"MS2PIP with static mods: {ms2pip_static.serialize()}")
85
86
87if __name__ == "__main__":
88 run()