CompOmics · ATPs · Apr 16, 2025 · Apr 16, 2025
diff --git a/psm_utils/io/percolator.py b/psm_utils/io/percolator.py
@@ -16,6 +16,7 @@
 from __future__ import annotations
 
 import csv
+import gzip
 import logging
 import re
 from pathlib import Path
@@ -118,8 +119,12 @@ def __iter__(self) -> Iterable[PSM]:
 
     @staticmethod
     def _read_header(filename):
-        with open(filename, "rt") as f:
-            fieldnames = f.readline().strip().lower().split("\t")
+        if str(filename).endswith(".gz"):
+            with gzip.open(filename, "rt") as f:
+                fieldnames = f.readline().strip().lower().split("\t")
+        else:
+            with open(filename, "rt") as f:
+                fieldnames = f.readline().strip().lower().split("\t")
         return fieldnames
 
     @staticmethod
@@ -144,9 +149,15 @@ def _infer_charge_columns(fieldnames):
     @staticmethod
     def _parse_peptidoform(percolator_peptide, charge):
         """Parse Percolator TSV peptide notation to Peptidoform."""
-        # Remove leading and trailing amino acids
+        # Remove leading and trailing amino acids (e.g., R.PEPTIDE.S -> PEPTIDE)
         match = re.match(r"^(?:[A-Z-])?\.(.+)\.(?:[A-Z-])?$", percolator_peptide)
         peptidoform = match[1] if match else percolator_peptide
+        # Handle Comet's n-terminal modification format: n[42.0106]PEPTIDE
+        peptidoform = re.sub(r'^n\[([+-]?[\w\.]*?)\]', r'[\1]-', peptidoform)
+
+        # Ensure positive values inside square brackets have a '+' sign
+        peptidoform = re.sub(r'\[(\d+[\.]*\d*)]', r'[+\1]', peptidoform)
+
         if charge:
             peptidoform += f"/{charge}"
         return Peptidoform(peptidoform)
@@ -367,7 +378,12 @@ def _parse_existing_file(
     ) -> Tuple[List[str], Optional[int]]:
         """Parse existing Percolator Tab file to determine fieldnames and last ScanNr."""
         # Get fieldnames
-        with open(filename, "rt") as open_file:
+        if str(filename).endswith(".gz"):
+            open_func = gzip.open
+        else:
+            open_func = open
+
+        with open_func(filename, "rt") as open_file:
             for line in open_file:
                 fieldnames = line.strip().split("\t")
                 break
@@ -382,7 +398,7 @@ def _parse_existing_file(
 
         # Get last ScanNr
         last_scannr = None
-        with open(filename, "rt") as open_file:
+        with open_func(filename, "rt") as open_file:
             # Read last line
             open_file.seek(0)
             last_line = None
@@ -409,7 +425,11 @@ def _parse_existing_file(
 class _PercolatorTabIO:
     def __init__(self, *args, protein_separator="|||", **kwargs) -> None:
         """File reader and writer for Percolator Tab files with fixed Proteins tab."""
-        self._open_file = open(*args, **kwargs)
+        filename = args[0]
+        if str(filename).endswith(".gz"):
+            self._open_file = gzip.open(*args, **kwargs)
+        else:
+            self._open_file = open(*args, **kwargs)
         self.protein_separator = protein_separator
 
     def __enter__(self, *args, **kwargs) -> _PercolatorTabIO: