qiime2 · cherman2 · Jan 20, 2026 · Nov 6, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/q2_alignment/_mafft.py b/q2_alignment/_mafft.py
@@ -8,10 +8,17 @@
 
 import os
 import subprocess
+from typing import Union
 
 import skbio
 import skbio.io
-from q2_types.feature_data import DNAFASTAFormat, AlignedDNAFASTAFormat
+from q2_types.feature_data import (
+    DNAFASTAFormat,
+    AlignedDNAFASTAFormat,
+    ProteinFASTAFormat,
+    AlignedProteinFASTAFormat,
+    FASTAFormat
+)
 from qiime2 import get_cache
 
 
@@ -29,7 +36,7 @@ def run_command(cmd, output_fp, verbose=True, env=None):
 
 
 def _mafft(sequences_fp, alignment_fp, n_threads, parttree, addfragments,
-           keeplength, large, strategy, maxiterate, retree):
+           keeplength, large, strategy, maxiterate, retree, sequence_type):
     # Save original sequence IDs since long ids (~250 chars) can be truncated
     # by mafft. We'll replace the IDs in the aligned sequences file output by
     # mafft with the originals.
@@ -38,9 +45,12 @@ def _mafft(sequences_fp, alignment_fp, n_threads, parttree, addfragments,
     aligned_seq_ids = {}
     unaligned_seq_ids = {}
 
+    constructor = skbio.DNA if sequence_type is DNAFASTAFormat \
+        else skbio.Protein
+
     if alignment_fp is not None:
         for seq in skbio.io.read(alignment_fp, format='fasta',
-                                 constructor=skbio.DNA):
+                                 constructor=constructor):
             id_ = seq.metadata['id']
             if id_ in aligned_seq_ids:
                 raise ValueError(
@@ -50,7 +60,7 @@ def _mafft(sequences_fp, alignment_fp, n_threads, parttree, addfragments,
                 aligned_seq_ids[id_] = True
 
     for seq in skbio.io.read(sequences_fp, format='fasta',
-                             constructor=skbio.DNA):
+                             constructor=constructor):
         id_ = seq.metadata['id']
         if id_ in unaligned_seq_ids:
             raise ValueError(
@@ -63,7 +73,8 @@ def _mafft(sequences_fp, alignment_fp, n_threads, parttree, addfragments,
         else:
             unaligned_seq_ids[id_] = True
 
-    result = AlignedDNAFASTAFormat()
+    result = FASTAFormat()
+    result.aligned = True
     result_fp = str(result)
     ids = {**aligned_seq_ids, **unaligned_seq_ids}
 
@@ -128,7 +139,7 @@ def _mafft(sequences_fp, alignment_fp, n_threads, parttree, addfragments,
     # Read output alignment into memory, reassign original sequence IDs, and
     # write alignment back to disk.
     msa = skbio.TabularMSA.read(result_fp, format='fasta',
-                                constructor=skbio.DNA)
+                                constructor=constructor)
     # Using `assert` because mafft would have had to add or drop sequences
     # while aligning, which would be a bug on mafft's end. This is just a
     # sanity check and is not expected to trigger in practice.
@@ -147,33 +158,40 @@ def _mafft(sequences_fp, alignment_fp, n_threads, parttree, addfragments,
     return result
 
 
-def mafft(sequences: DNAFASTAFormat,
-          n_threads: int = 1,
-          parttree: bool = False,
-          large: bool = False,
-          strategy: str | None = None,
-          maxiterate: int | None = None,
-          retree: int | None = None,) -> AlignedDNAFASTAFormat:
+def mafft(
+        sequences: Union[DNAFASTAFormat, ProteinFASTAFormat],
+        n_threads: int = 1,
+        parttree: bool = False,
+        large: bool = False,
+        strategy: str | None = None,
+        maxiterate: int | None = None,
+        retree: int | None = None,
+        ) -> FASTAFormat:
     sequences_fp = str(sequences)
+    sequences_type = type(sequences)
+
     return _mafft(
         sequences_fp, None, n_threads, parttree, False, False, large,
-        strategy, maxiterate, retree
-    )
-
-
-def mafft_add(alignment: AlignedDNAFASTAFormat,
-              sequences: DNAFASTAFormat,
-              n_threads: int = 1,
-              parttree: bool = False,
-              addfragments: bool = False,
-              keeplength: bool = False,
-              large: bool = False,
-              strategy: str | None = None,
-              maxiterate: int | None = None,
-              retree: int | None = None) -> AlignedDNAFASTAFormat:
+        strategy, maxiterate, retree, sequences_type)
+
+
+def mafft_add(
+        alignment: Union[AlignedDNAFASTAFormat,
+                         AlignedProteinFASTAFormat],
+        sequences: Union[DNAFASTAFormat, ProteinFASTAFormat],
+        n_threads: int = 1,
+        parttree: bool = False,
+        addfragments: bool = False,
+        keeplength: bool = False,
+        large: bool = False,
+        strategy: str | None = None,
+        maxiterate: int | None = None,
+        retree: int | None = None
+        ) -> FASTAFormat:
     alignment_fp = str(alignment)
     sequences_fp = str(sequences)
+    sequences_type = type(sequences)
+
     return _mafft(
         sequences_fp, alignment_fp, n_threads, parttree, addfragments,
-        keeplength, large, strategy, maxiterate, retree
-    )
+        keeplength, large, strategy, maxiterate, retree, sequences_type)
diff --git a/q2_alignment/plugin_setup.py b/q2_alignment/plugin_setup.py
@@ -6,9 +6,28 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 
+from importlib import import_module
+
+from q2_types.feature_data import (
+    AlignedProteinSequence,
+    AlignedSequence,
+    FeatureData,
+    ProteinSequence,
+    Sequence,
+)
 from qiime2.plugin import (
-    Plugin, Float, Bool, Range, Citations, Threads, Int, Str, Choices)
-from q2_types.feature_data import FeatureData, Sequence, AlignedSequence
+    Bool,
+    Choices,
+    Citations,
+    Float,
+    Int,
+    Plugin,
+    Range,
+    Str,
+    Threads,
+    TypeMap,
+    TypeMatch,
+)
 
 import q2_alignment
 
@@ -46,6 +65,29 @@
               'computation.',
 }
 
+T_sequence_in, T_alignment_out = TypeMap({
+    FeatureData[Sequence]:
+        FeatureData[AlignedSequence],
+    FeatureData[ProteinSequence]:
+        FeatureData[AlignedProteinSequence]
+})
+
+(
+    T_expanded_sequence_in,
+    T_expanded_alignment_in,
+    T_expanded_alignment_out,
+) = TypeMap({
+    (FeatureData[Sequence], FeatureData[AlignedSequence]):
+        FeatureData[AlignedSequence],
+    (FeatureData[ProteinSequence], FeatureData[AlignedProteinSequence]):
+        FeatureData[AlignedProteinSequence],
+})
+
+T_match_alignment = TypeMatch([
+    FeatureData[AlignedSequence],
+    FeatureData[AlignedProteinSequence],
+])
+
 citations = Citations.load('citations.bib', package='q2_alignment')
 plugin = Plugin(
     name='alignment',
@@ -59,11 +101,11 @@
 
 plugin.methods.register_function(
     function=q2_alignment.mafft,
-    inputs={'sequences': FeatureData[Sequence]},
+    inputs={'sequences': T_sequence_in},
     parameters={
         **mafft_params,
     },
-    outputs=[('alignment', FeatureData[AlignedSequence])],
+    outputs=[('alignment', T_alignment_out)],
     input_descriptions={'sequences': 'The sequences to be aligned.'},
     parameter_descriptions={
         **mafft_param_descriptions,
@@ -76,14 +118,14 @@
 
 plugin.methods.register_function(
     function=q2_alignment.mafft_add,
-    inputs={'alignment': FeatureData[AlignedSequence],
-            'sequences': FeatureData[Sequence]},
+    inputs={'alignment': T_expanded_alignment_in,
+            'sequences': T_expanded_sequence_in},
     parameters={
         **mafft_params,
         'addfragments': Bool,
         'keeplength': Bool,
     },
-    outputs=[('expanded_alignment', FeatureData[AlignedSequence])],
+    outputs=[('expanded_alignment', T_expanded_alignment_out)],
     input_descriptions={'alignment': 'The alignment to which '
                                      'sequences should be added.',
                         'sequences': 'The sequences to be added.'},
@@ -108,10 +150,10 @@
 
 plugin.methods.register_function(
     function=q2_alignment.mask,
-    inputs={'alignment': FeatureData[AlignedSequence]},
+    inputs={'alignment': T_match_alignment},
     parameters={'max_gap_frequency': Float % Range(0, 1, inclusive_end=True),
                 'min_conservation': Float % Range(0, 1, inclusive_end=True)},
-    outputs=[('masked_alignment', FeatureData[AlignedSequence])],
+    outputs=[('masked_alignment', T_match_alignment)],
     input_descriptions={'alignment': 'The alignment to be masked.'},
     parameter_descriptions={
         'max_gap_frequency': ('The maximum relative frequency of gap '
@@ -137,3 +179,5 @@
                  "chosen to reproduce the mask presented in Lane (1991)."),
     citations=[citations['lane1991']]
 )
+
+import_module("q2_alignment.types._transformer")
diff --git a/q2_alignment/tests/data/aligned-protein-sequences-1.fasta b/q2_alignment/tests/data/aligned-protein-sequences-1.fasta
@@ -0,0 +1,6 @@
+>sequence1
+MTTRDLTAAQFNETIQSS--DMVLVDYWASWCG-PCRAF-------APTF----AESSEKHPDVVHAKVDT---EAERELAAAAQIR---------------------------
+>sequence2
+MVKQIESKTAFQEALDAAGDKLVVVDFSATWCG-PCKMI-------KPFF----HSLSEKYSNVIFLEVDV---DDCQDVASECEVKCMPTFQFFKKGQKVGEFSGAN*-----
+>sequence3
+---TEPDZNZWKRUZQYTW-------UYKSWUQFPUNHMDBGHFDZSPIYKCZHQXLCEBYJREOAUJVDLIRPEGPOGMEJZQQRHC---FQXUPZLDWDGOXTOQTCIQDD*
diff --git a/q2_alignment/tests/data/aligned-protein-sequences-2.fasta b/q2_alignment/tests/data/aligned-protein-sequences-2.fasta
@@ -0,0 +1,9 @@
+>sequence1b
+------------------------VDFSATWCGPCKMIKPFFHSLSEKYSNVIFLEVDVDDCQD
+VASECEVKCMPTFQFFKKGQKVGEFSGAN
+>sequence2b
+MVKQIESKTAFQEALDAAGDKLVVVDFSATWCGPCKMIKPFFHSLSEKYSNVIFLEVDVDDCQD
+VASECEVKCMPTFQ-------VGEFSGAN
+>sequence3b
+MVKQIESKTAFQJALDAAGDKLVVVDFSATWCGPCKMIKPFFHSLSEKYSNUIFLEVDVDDCQD
+VASECEVKCMPTFO-------VGEFSGAN
diff --git a/q2_alignment/tests/data/aligned-protein-sequences-3.fasta b/q2_alignment/tests/data/aligned-protein-sequences-3.fasta
@@ -0,0 +1,12 @@
+>sequence1b
+------------------------VDFSATWCG-PCKMI-------KPFF----HSLSEKYSNVIFLEVDV---DDCQDVASECEVKCMPTFQFFKKGQKVGEFSGAN------
+>sequence2b
+MVKQIESKTAFQEALDAAGDKLVVVDFSATWCG-PCKMI-------KPFF----HSLSEKYSNVIFLEVDV---DDCQDVASECEVKCMPTFQ-------VGEFSGAN------
+>sequence3b
+MVKQIESKTAFQJALDAAGDKLVVVDFSATWCG-PCKMI-------KPFF----HSLSEKYSNUIFLEVDV---DDCQDVASECEVKCMPTFO-------VGEFSGAN------
+>sequence1
+MTTRDLTAAQFNETIQSS--DMVLVDYWASWCG-PCRAF-------APTF----AESSEKHPDVVHAKVDT---EAERELAAAAQIR---------------------------
+>sequence2
+MVKQIESKTAFQEALDAAGDKLVVVDFSATWCG-PCKMI-------KPFF----HSLSEKYSNVIFLEVDV---DDCQDVASECEVKCMPTFQFFKKGQKVGEFSGAN*-----
+>sequence3
+---TEPDZNZWKRUZQYTW-------UYKSWUQFPUNHMDBGHFDZSPIYKCZHQXLCEBYJREOAUJVDLIRPEGPOGMEJZQQRHC---FQXUPZLDWDGOXTOQTCIQDD*
diff --git a/q2_alignment/tests/data/protein-sequences-1.fasta b/q2_alignment/tests/data/protein-sequences-1.fasta
@@ -0,0 +1,9 @@
+>sequence1
+MTTRDLTAAQFNETIQSSDMVLVDYWASWCGPCRAFAPTFAESSEKHPDVVHAKVDTEAERELA
+AAAQIR
+>sequence2
+MVKQIESKTAFQEALDAAGDKLVVVDFSATWCGPCKMIKPFFHSLSEKYSNVIFLEVDVDDCQD
+VASECEVKCMPTFQFFKKGQKVGEFSGAN*
+>sequence3
+TEPDZNZWKRUZQYTWUYKSWUQFPUNHMDBGHFDZSPIYKCZHQXLCEBYJREOAUJVDLIRP
+EGPOGMEJZQQRHCFQXUPZLDWDGOXTOQTCIQDD*
diff --git a/q2_alignment/tests/test_filter.py b/q2_alignment/tests/test_filter.py
@@ -28,6 +28,16 @@ def test_basic(self):
         expected = [1.0, 1.0, 0.5, 0.25]
         self.assertEqual(actual, expected)
 
+    def test_basic_protein(self):
+        frequencies = [
+            {'A': 0.6, 'G': 0.4},
+            {'L': 0.9, 'I': 0.1},
+            {'F': 0.33, '-': 0.67}
+        ]
+        actual = _most_conserved(frequencies, skbio.Protein)
+        expected = [0.6, 0.9, 1.0]
+        self.assertEqual(actual, expected)
+
     def test_N(self):
         frequencies = [{'A': 1/3, '-': 2/3}, {'G': 1.0}, {'A': 2/3, 'N': 1/3}]
         actual = _most_conserved(frequencies, skbio.DNA)
@@ -71,6 +81,23 @@ def test_basic(self):
 
         self.assertEqual(actual, expected)
 
+    def test_basic_protein(self):
+        alignment = skbio.TabularMSA(
+            [skbio.Protein('MTT', metadata={'id': 'seq1', 'description': ''}),
+             skbio.Protein('-MT', metadata={'id': 'seq2', 'description': ''}),
+             skbio.Protein('-MK', metadata={'id': 'seq3', 'description': ''})]
+        )
+
+        actual = mask(alignment, max_gap_frequency=0.05, min_conservation=0.30)
+
+        expected = skbio.TabularMSA(
+            [skbio.Protein('TT', metadata={'id': 'seq1', 'description': ''}),
+             skbio.Protein('MT', metadata={'id': 'seq2', 'description': ''}),
+             skbio.Protein('MK', metadata={'id': 'seq3', 'description': ''})]
+        )
+
+        self.assertEqual(actual, expected)
+
     def test_gap_boundaries(self):
         alignment1 = skbio.TabularMSA(
             [skbio.DNA('-', metadata={'id': 'seq1', 'description': ''}),

diff --git a/q2_alignment/tests/test_mafft.py b/q2_alignment/tests/test_mafft.py
@@ -12,7 +12,12 @@
 
 import skbio
 from qiime2.plugin.testing import TestPluginBase
-from q2_types.feature_data import DNAFASTAFormat, AlignedDNAFASTAFormat
+from q2_types.feature_data import (
+    DNAFASTAFormat,
+    AlignedDNAFASTAFormat,
+    ProteinFASTAFormat,
+    AlignedProteinFASTAFormat,
+)
 from qiime2.util import redirected_stdio
 
 from q2_alignment import mafft, mafft_add
@@ -418,6 +423,56 @@ def test_long_ids_are_not_truncated_aligned(self):
         self.assertIn('seq1', obs)
         self.assertIn('seq2', obs)
 
+    @patch("q2_alignment._mafft._mafft")
+    def test_mafft_add_sets_protein_sequence_type(self, mock_mafft):
+        alignment = AlignedProteinFASTAFormat()
+        seqs = ProteinFASTAFormat()
+        mafft_add(alignment, seqs)
+        mock_mafft.assert_called_once()
+        args, _ = mock_mafft.call_args
+        assert args[-1] is ProteinFASTAFormat
+
+    @patch("q2_alignment._mafft._mafft")
+    def test_mafft_add_sets_nucleotide_sequence_type(self, mock_mafft):
+        alignment = AlignedDNAFASTAFormat()
+        seqs = DNAFASTAFormat()
+        mafft_add(alignment, seqs)
+        mock_mafft.assert_called_once()
+        args, _ = mock_mafft.call_args
+        assert args[-1] is DNAFASTAFormat
+
+    def test_mafft_protein(self):
+        input_fp = self.get_data_path('protein-sequences-1.fasta')
+        input_sequences = ProteinFASTAFormat(input_fp, mode='r')
+        aligned_fp = self.get_data_path('aligned-protein-sequences-1.fasta')
+        exp = AlignedProteinFASTAFormat(aligned_fp, mode='r')
+
+        with redirected_stdio(stderr=os.devnull):
+            result = mafft(input_sequences)
+        exp = skbio.io.read(str(exp), into=skbio.TabularMSA,
+                            constructor=skbio.Protein)
+        obs = skbio.io.read(str(result), into=skbio.TabularMSA,
+                            constructor=skbio.Protein)
+
+        self.assertEqual(obs, exp)
+
+    def test_mafft_add_protein(self):
+        sequences_fp = self.get_data_path('protein-sequences-1.fasta')
+        input_sequences = ProteinFASTAFormat(sequences_fp, mode='r')
+        aligned_fp = self.get_data_path('aligned-protein-sequences-2.fasta')
+        input_alignment = AlignedProteinFASTAFormat(aligned_fp, mode='r')
+        exp_fp = self.get_data_path('aligned-protein-sequences-3.fasta')
+        exp_alignment = AlignedProteinFASTAFormat(exp_fp, mode='r')
+
+        with redirected_stdio(stderr=os.devnull):
+            result = mafft_add(input_alignment, input_sequences)
+        exp = skbio.io.read(str(result), into=skbio.TabularMSA,
+                            constructor=skbio.Protein)
+        obs = skbio.io.read(str(exp_alignment), into=skbio.TabularMSA,
+                            constructor=skbio.Protein)
+
+        self.assertEqual(obs, exp)
+
 
 class RunCommandTests(TestPluginBase):
     package = 'q2_alignment.tests'