From 53f7d94046b04a4e9509abbf57043ad91a3b8ff7 Mon Sep 17 00:00:00 2001
From: Xiaolong Cao <ATPs@users.noreply.github.com>
Date: Wed, 16 Apr 2025 15:36:42 +0800
Subject: [PATCH 1/2] percolator support gzipped file

---
 psm_utils/io/percolator.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/psm_utils/io/percolator.py b/psm_utils/io/percolator.py
index 045d09c..5a288b6 100644
--- a/psm_utils/io/percolator.py
+++ b/psm_utils/io/percolator.py
@@ -16,6 +16,7 @@
 from __future__ import annotations
 
 import csv
+import gzip
 import logging
 import re
 from pathlib import Path
@@ -118,8 +119,12 @@ def __iter__(self) -> Iterable[PSM]:
 
     @staticmethod
     def _read_header(filename):
-        with open(filename, "rt") as f:
-            fieldnames = f.readline().strip().lower().split("\t")
+        if str(filename).endswith(".gz"):
+            with gzip.open(filename, "rt") as f:
+                fieldnames = f.readline().strip().lower().split("\t")
+        else:
+            with open(filename, "rt") as f:
+                fieldnames = f.readline().strip().lower().split("\t")
         return fieldnames
 
     @staticmethod
@@ -367,7 +372,12 @@ def _parse_existing_file(
     ) -> Tuple[List[str], Optional[int]]:
         """Parse existing Percolator Tab file to determine fieldnames and last ScanNr."""
         # Get fieldnames
-        with open(filename, "rt") as open_file:
+        if str(filename).endswith(".gz"):
+            open_func = gzip.open
+        else:
+            open_func = open
+            
+        with open_func(filename, "rt") as open_file:
             for line in open_file:
                 fieldnames = line.strip().split("\t")
                 break
@@ -382,7 +392,7 @@ def _parse_existing_file(
 
         # Get last ScanNr
         last_scannr = None
-        with open(filename, "rt") as open_file:
+        with open_func(filename, "rt") as open_file:
             # Read last line
             open_file.seek(0)
             last_line = None
@@ -409,7 +419,11 @@ def _parse_existing_file(
 class _PercolatorTabIO:
     def __init__(self, *args, protein_separator="|||", **kwargs) -> None:
         """File reader and writer for Percolator Tab files with fixed Proteins tab."""
-        self._open_file = open(*args, **kwargs)
+        filename = args[0]
+        if str(filename).endswith(".gz"):
+            self._open_file = gzip.open(*args, **kwargs)
+        else:
+            self._open_file = open(*args, **kwargs)
         self.protein_separator = protein_separator
 
     def __enter__(self, *args, **kwargs) -> _PercolatorTabIO:

From 1e6e1522bce7d32d51699951103f671bbaa7097b Mon Sep 17 00:00:00 2001
From: Xiaolong Cao <ATPs@users.noreply.github.com>
Date: Wed, 16 Apr 2025 16:43:36 +0800
Subject: [PATCH 2/2] handle percolator pin file generated by comet

---
 psm_utils/io/percolator.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/psm_utils/io/percolator.py b/psm_utils/io/percolator.py
index 5a288b6..e70e064 100644
--- a/psm_utils/io/percolator.py
+++ b/psm_utils/io/percolator.py
@@ -149,9 +149,15 @@ def _infer_charge_columns(fieldnames):
     @staticmethod
     def _parse_peptidoform(percolator_peptide, charge):
         """Parse Percolator TSV peptide notation to Peptidoform."""
-        # Remove leading and trailing amino acids
+        # Remove leading and trailing amino acids (e.g., R.PEPTIDE.S -> PEPTIDE)
         match = re.match(r"^(?:[A-Z-])?\.(.+)\.(?:[A-Z-])?$", percolator_peptide)
         peptidoform = match[1] if match else percolator_peptide
+        # Handle Comet's n-terminal modification format: n[42.0106]PEPTIDE
+        peptidoform = re.sub(r'^n\[([+-]?[\w\.]*?)\]', r'[\1]-', peptidoform)
+        
+        # Ensure positive values inside square brackets have a '+' sign
+        peptidoform = re.sub(r'\[(\d+[\.]*\d*)]', r'[+\1]', peptidoform)
+        
         if charge:
             peptidoform += f"/{charge}"
         return Peptidoform(peptidoform)