#!/bin/bash awk -F'[\t]' '{print $2}' taxa/taxonomy.tsv | awk -F'[; ]' '{print $NF}' >> col_tax awk '{print substr($1,0,36)}' taxa/taxonomy.tsv >> col_id paste -d- col_tax col_id > ids_table sed -i -e '1s/.*/ID/' -e 's/__/_/' -e 's/_/-/g' -e 's/[^\d\n\.A-Za-z-]//' -e 's/)//' -e 's/]//' ids_table sed 's/^\(.\{36\}\).*$/\1/' ids_table > cutcut awk '{ a[$0]++ } END{ for(x in a) print a[x], x }' cutcut > duplicates awk '!visited[$0]++' cutcut > uniques rm col_id col_tax ids_table cutcut