72 lines
2.2 KiB
Python
Executable File
72 lines
2.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Trinity FASTA Sequence Renaming Script
|
|
Function: Rename sequences in FASTA file to format: [prefix@sequence_number]
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
def rename_fasta_sequences(input_file, prefix, output_file=None):
|
|
"""
|
|
Rename sequence headers in a FASTA file
|
|
|
|
Parameters:
|
|
input_file: Input FASTA filename
|
|
prefix: Prefix for sequence names
|
|
output_file: Output filename (optional, defaults to input_file_renamed.fasta)
|
|
"""
|
|
|
|
# Set output filename
|
|
if output_file is None:
|
|
file_base, file_ext = os.path.splitext(input_file)
|
|
output_file = f"{file_base}_renamed{file_ext}"
|
|
|
|
# Counter for sequences
|
|
seq_count = 0
|
|
|
|
try:
|
|
with open(input_file, 'r') as fin, open(output_file, 'w') as fout:
|
|
for line in fin:
|
|
if line.startswith('>'):
|
|
# Sequence header line: rename it
|
|
seq_count += 1
|
|
new_name = f">{prefix}@mrna_{seq_count}\n"
|
|
fout.write(new_name)
|
|
else:
|
|
# Sequence data line: write as-is
|
|
fout.write(line)
|
|
|
|
print(f"Successfully renamed {seq_count} sequences")
|
|
print(f"Input file: {input_file}")
|
|
print(f"Output file: {output_file}")
|
|
print(f"Naming format: {prefix}@mrna_number")
|
|
|
|
except FileNotFoundError:
|
|
print(f"Error: Input file '{input_file}' not found")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"Error processing file: {e}")
|
|
sys.exit(1)
|
|
|
|
def main():
|
|
"""Main function"""
|
|
if len(sys.argv) < 3:
|
|
print("Usage: python script.py <fasta_file> <prefix> [output_file]")
|
|
print("Example: python script.py sequences.fasta Gene new_sequences.fasta")
|
|
sys.exit(1)
|
|
|
|
input_file = sys.argv[1]
|
|
prefix = sys.argv[2]
|
|
output_file = sys.argv[3] if len(sys.argv) > 3 else None
|
|
|
|
# Verify input file exists
|
|
if not os.path.isfile(input_file):
|
|
print(f"Error: File '{input_file}' does not exist")
|
|
sys.exit(1)
|
|
|
|
rename_fasta_sequences(input_file, prefix, output_file)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|