#!/usr/bin/env python3 """ Trinity FASTA Sequence Renaming Script Function: Rename sequences in FASTA file to format: [prefix@sequence_number] """ import sys import os def rename_fasta_sequences(input_file, prefix, output_file=None): """ Rename sequence headers in a FASTA file Parameters: input_file: Input FASTA filename prefix: Prefix for sequence names output_file: Output filename (optional, defaults to input_file_renamed.fasta) """ # Set output filename if output_file is None: file_base, file_ext = os.path.splitext(input_file) output_file = f"{file_base}_renamed{file_ext}" match_tsv = f"{output_file}.tsv" print(f"Input file: {input_file}") print(f"Output file: {output_file}") print(f"Naming format: {prefix}@mrna_") print(f"Match TSV file: {match_tsv}") # Counter for sequences seq_count = 0 try: with ( open(input_file, "r") as fin, open(output_file, "w") as fout, open(match_tsv, "w") as tsvout, ): tsvout.write("Original_Name\tNew_Name\n") for line in fin: if line.startswith(">"): # Sequence header line: rename it seq_count += 1 original_name = line[1:].strip().split()[0] new_name = f"{prefix}@mrna_{seq_count}\n" fout.write(f">{new_name}") tsvout.write(f"{original_name}\t{new_name}\n") else: # Sequence data line: write as-is fout.write(line) print(f"Successfully renamed {seq_count} sequences") except FileNotFoundError: print(f"Error: Input file '{input_file}' not found") sys.exit(1) except Exception as e: print(f"Error processing file: {e}") sys.exit(1) def main(): """Main function""" if len(sys.argv) < 3: print("Usage: python script.py [output_file]") print("Example: python script.py sequences.fasta Gene new_sequences.fasta") sys.exit(1) input_file = sys.argv[1] prefix = sys.argv[2] output_file = sys.argv[3] if len(sys.argv) > 3 else None # Verify input file exists if not os.path.isfile(input_file): print(f"Error: File '{input_file}' does not exist") sys.exit(1) rename_fasta_sequences(input_file, prefix, output_file) if __name__ == "__main__": main()