I recently made a script to obtain Otu-table and phylogenetic tree, however in the sequence alignment step #4, it happens that a lot of time passes and it doesn't leave this part, however the CPU and RAM usage is close to 95%. I don't know if this part of the script is wrong or if something is missing. Follow the entire script, if anyone can help me, or is it normal to take a long time to align OTUs???
My computer's specs are: Ryzen 7 (16 threads) and 16 GB RAM with 16 GB swap.
#!/bin/bash
echo -e "\033[0;32mSTARTING SCRIPT...\033[0m"
# Verificar se o número correto de parâmetros foi passado
if [ "$#" -ne 3 ]; then
echo -e "\033[0;31mUso: $0 <path_manifest.csv> <classifier.qza> <output_directory>\033[0m"
exit 1
fi
# Atribuir argumentos a variáveis
MANIFEST_PATH="$1"
CLASSIFIER_PATH="$2"
OUTPUT_DIR="$3"
DEMUX_OUTPUT="${OUTPUT_DIR}/demux.qza"
# Criar o diretório de saída se ele não existir
echo -e "\033[0;35mCreating output directory...\033[0m"
mkdir -p "$OUTPUT_DIR"
echo -e "\033[0;32mOutput directory successfully created.\033[0m"
# 1. Importar dados
echo -e "\033[0;35mImporting data.\033[0m"
qiime tools import \
--type 'SampleData[SequencesWithQuality]' \
--input-path "$MANIFEST_PATH" \
--output-path "$DEMUX_OUTPUT" \
--input-format SingleEndFastqManifestPhred33V2
echo -e "\033[0;32mImported successfully.\033[0m"
# 2. Visualizar dados importados
echo -e "\033[0;35mCreating visualization of imported data.\033[0m"
qiime demux summarize \
--i-data "$DEMUX_OUTPUT" \
--o-visualization "${OUTPUT_DIR}/demux-summary.qzv"
echo -e "\033[0;32mFinished, see ${OUTPUT_DIR}.\033[0m"
# 3. Desduplicar sequências
echo -e "\033[0;35mDereplicating sequences.\033[0m"
qiime vsearch dereplicate-sequences \
--i-sequences "$DEMUX_OUTPUT" \
--o-dereplicated-table "${OUTPUT_DIR}/table.qza" \
--o-dereplicated-sequences "${OUTPUT_DIR}/rep-seqs.qza"
echo -e "\033[0;32mDereplication successfully.\033[0m"
# 4. Agrupar sequências em OTUs (97% de similaridade)
echo -e "\033[0;35mAligning OTUs.\033[0m"
qiime vsearch cluster-features-de-novo \
--i-table "${OUTPUT_DIR}/table.qza" \
--i-sequences "${OUTPUT_DIR}/rep-seqs.qza" \
--p-perc-identity 0.97 \
--o-clustered-table "${OUTPUT_DIR}/otu-table.qza" \
--o-clustered-sequences "${OUTPUT_DIR}/otu-rep-seqs.qza"
echo -e "\033[0;32mAligning successfully.\033[0m"
# 5. Visualizar OTUs
echo -e "\033[0;35mGenerating OTU visualization files.\033[0m"
qiime feature-table summarize \
--i-table "${OUTPUT_DIR}/otu-table.qza" \
--o-visualization "${OUTPUT_DIR}/otu-table.qzv"
qiime feature-table tabulate-seqs \
--i-data "${OUTPUT_DIR}/otu-rep-seqs.qza" \
--o-visualization "${OUTPUT_DIR}/otu-rep-seqs.qzv"
echo -e "\033[0;32mFinished, see ${OUTPUT_DIR}.\033[0m"
# 6. Atribuir taxonomia para OTUs
echo -e "\033[0;35mSorting...\033[0m"
qiime feature-classifier classify-sklearn \
--i-classifier "$CLASSIFIER_PATH" \
--i-reads "${OUTPUT_DIR}/otu-rep-seqs.qza" \
--o-classification "${OUTPUT_DIR}/taxonomy.qza"
echo -e "\033[0;32mTaxonomic assignment completed successfully, see ${OUTPUT_DIR}.\033[0m"
# 7. Visualizar resultados da taxonomia para OTUs
echo -e "\033[0;35mGenerating OTU visualization files.\033[0m"
qiime metadata tabulate \
--m-input-file "${OUTPUT_DIR}/taxonomy.qza" \
--o-visualization "${OUTPUT_DIR}/taxonomy.qzv"
echo -e "\033[0;32mFinished, see ${OUTPUT_DIR}."
# 8. Gerar árvore filogenética a partir de OTUs
echo -e "\033[0;35mGenerating phylogenetic tree.\033[0m"
qiime alignment mafft \
--i-sequences "${OUTPUT_DIR}/otu-rep-seqs.qza" \
--o-alignment "${OUTPUT_DIR}/aligned-otu-rep-seqs.qza"
echo -e "\033[0;32m1/4\033[0m"
qiime alignment mask \
--i-alignment "${OUTPUT_DIR}/aligned-otu-rep-seqs.qza" \
--o-masked-alignment "${OUTPUT_DIR}/masked-aligned-otu-rep-seqs.qza"
echo -e "\033[0;32m2/4\033[0m"
qiime phylogeny fasttree \
--i-alignment "${OUTPUT_DIR}/masked-aligned-otu-rep-seqs.qza" \
--o-tree "${OUTPUT_DIR}/unrooted-tree.qza"
echo -e "\033[0;32m3/4\033[0m"
qiime phylogeny midpoint-root \
--i-tree "${OUTPUT_DIR}/unrooted-tree.qza" \
--o-rooted-tree "${OUTPUT_DIR}/rooted-tree.qza"
echo -e "\033[0;32m4/4 Árvore filogenética gerada.\033[0m"
# 9. Exportar a árvore filogenética no formato .nwk
echo -e "\033[0;35mExporting the phylogenetic tree to .nwk format.\033[0m"
qiime tools export \
--input-path "${OUTPUT_DIR}/rooted-tree.qza" \
--output-path "${OUTPUT_DIR}/tree-output.nwk"
echo -e "\033[0;32mExport completed to ${OUTPUT_DIR}/tree-output.\033[0m"
# Fim do script
echo -e "\033[0;32mScript finished. Check all generated files in: ${OUTPUT_DIR}.\033[0m"