enable_lmod
module load sra/2.9
fastq-dump.2.9.2 --split-files SRR1648119 --defline-seq '@$sn[_$rn]/$ri' -O ./
fastq-dump.2.9.2 --split-files SRR1646513 --defline-seq '@$sn[_$rn]/$ri' -O ./
Files: -rw-r--r-- 1 pscha005 users 12G Jun 26 08:32 SRR1646513_1.fastq -rw-r--r-- 1 pscha005 users 12G Jun 26 08:32 SRR1646513_2.fastq -rw-r--r-- 1 pscha005 users 13G Jun 26 08:31 SRR1648119_1.fastq -rw-r--r-- 1 pscha005 users 13G Jun 26 08:31 SRR1648119_2.fastq
enable_lmod
module load java/11.0
java -jar /scratch-lustre/pscha005/trimmomatic/trimmomatic-0.33.jar PE -threads 16 SRR1646513_1.fastq.gz SRR1646513_2.fastq.gz ./SRR1646513_R1_forward_paired.fq.gz ./SRR1646513_R1_forward_unpaired.fq.gz ./SRR1646513_R2_reverse_paired.fq.gz ./SRR1646513_R2_reverse_unpaired.fq.gz ILLUMINACLIP:/scratch-lustre/pscha005/trimmomatic/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
enable_lmod
module load spades/3.13
rnaspades.py -m 768 -t 32 --pe1-1 ./SRR1648119_R1_forward_paired.fq.gz --pe1-2 ./SRR1648119_R2_reverse_paired.fq.gz --pe1-1 ./SRR1646513_R1_forward_paired.fq.gz --pe1-2 ./SRR1646513_R2_reverse_paired.fq.gz -o RNAspades_output_combined
[pscha005@turing1 sinensis]$ grep -c ">" ./RNAspades_output_*/*transcripts.fasta
./RNAspades_output_combined/hard_filtered_transcripts.fasta:153018
./RNAspades_output_combined/soft_filtered_transcripts.fasta:273745
./RNAspades_output_combined/transcripts.fasta:209733
./RNAspades_output_SRR1646513/hard_filtered_transcripts.fasta:116998
./RNAspades_output_SRR1646513/soft_filtered_transcripts.fasta:201094
./RNAspades_output_SRR1646513/transcripts.fasta:156647
./RNAspades_output_SRR1648119/hard_filtered_transcripts.fasta:116440
./RNAspades_output_SRR1648119/soft_filtered_transcripts.fasta:201272
./RNAspades_output_SRR1648119/transcripts.fasta:155130
!Warning! Some issues with compilers encountered, before trying to make, load modules for gcc and cmake.
wget https://github.com/gmarcais/Jellyfish/releases/download/v2.2.10/jellyfish-2.2.10.tar.gz
./configure --prefix=$HOME ## This installs some files required by jellyfish in /home/pscha005/bin/jellyfish/
make -j 4
make install
wget https://github.com/COMBINE-lab/salmon/releases/download/v0.14.0/salmon-0.14.0_linux_x86_64.tar.gz
wget https://github.com/trinityrnaseq/trinityrnaseq/archive/Trinity-v2.8.5.tar.gz
make
#!/bin/bash -l
#SBATCH -o Trinity2.8_combined.txt
#SBATCH -n 32
#SBATCH -p himem
#SBATCH --mail-user=pscha005@odu.edu
#SBATCH --mail-type=END
#SBATCH --job-name=Trinity_Combined
enable_lmod
module load java/12.0
module load samtools
module load bowtie2/2.3
export PATH=$PATH:/scratch-lustre/pscha005/scripts/jellyfish-2.2.10/bin/:/scratch-lustre/pscha005/scripts/salmon-latest_linux_x86_64/bin/
/scratch-lustre/pscha005/scripts/trinityrnaseq-Trinity-v2.8.5/Trinity --seqType fq --max_memory 768G --left SRR1646513_1.fastq,SRR1648119_1.fastq --right SRR1646513_2.fastq,SRR1648119_2.fastq --CPU 32 --output trinity2.8_output
Errmsg:
Traceback (most recent call last):
File "/scratch-lustre/pscha005/scripts/trinityrnaseq-Trinity-v2.8.5/Analysis/SuperTranscripts/Trinity_gene_splice_modeler.py", line 11, in <module>
import numpy
ImportError: No module named numpy
#### Trying to add numpy (and dependecies cuda and python) to SLURM script
#!/bin/bash -l
#SBATCH -o Trinity2.8_combined.txt
#SBATCH -n 32
#SBATCH -p himem
#SBATCH --mail-user=pscha005@odu.edu
#SBATCH --mail-type=END
#SBATCH --job-name=Trinity2.8_Combined
enable_lmod
module load java/12.0
module load samtools
module load bowtie2/2.3
module load cuda/7.5
module load python/2.7
module load numpy/1.16
export PATH=$PATH:/scratch-lustre/pscha005/scripts/jellyfish-2.2.10/bin/:/scratch-lustre/pscha005/scripts/salmon-latest_linux_x86_64/bin/
/scratch-lustre/pscha005/scripts/trinityrnaseq-Trinity-v2.8.5/Trinity --seqType fq --max_memory 768G --left SRR1646513_1.fastq,SRR1648119_1.fastq --right SRR1646513_2.fastq,SRR1648119_2.fastq --CPU 32 --output trinity2.8_output
jellyfish: /usr/lib64/libstdc++.so.6: version GLIBCXX_3.4.20' not found (required by jellyfish)
jellyfish: /usr/lib64/libstdc++.so.6: version
CXXABI_1.3.8' not found (required by jellyfish)
jellyfish: /usr/lib64/libstdc++.so.6: version GLIBCXX_3.4.21' not found (required by jellyfish)
jellyfish: /usr/lib64/libstdc++.so.6: version
GLIBCXX_3.4.15' not found (required by jellyfish)
jellyfish: /usr/lib64/libstdc++.so.6: version GLIBCXX_3.4.19' not found (required by jellyfish)
jellyfish: /usr/lib64/libstdc++.so.6: version
GLIBCXX_3.4.20' not found (required by /home/pscha005/lib/libjellyfish-2.0.so.2)
jellyfish: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.21' not found (required by /home/pscha005/lib/libjellyfish-2.0.so.2)
Use of uninitialized value $version in pattern match (m//) at /scratch-lustre/pscha005/scripts/trinityrnaseq-Trinity-v2.8.5/Trinity line 3827.
Error, need jellyfish v2, instead found ; Get it here: http://www.genome.umd.edu/jellyfish.html at /scratch-lustre/pscha005/scripts/trinityrnaseq-Trinity-v2.8.5/Trinity line 3828.
#### Issue was GCC missing, added module load to script
#!/bin/bash -l
#SBATCH -o Trinity2.8_combined.txt
#SBATCH -n 32
#SBATCH -p himem
#SBATCH --mail-user=pscha005@odu.edu
#SBATCH --mail-type=END
#SBATCH --job-name=Trinity2.8_Combined
enable_lmod
module load gcc
module load java/12.0
module load samtools
module load bowtie2/2.3
module load cuda/7.5
module load python/2.7
module load numpy/1.16
export PATH=$PATH:/scratch-lustre/pscha005/scripts/jellyfish-2.2.10/bin/:/scratch-lustre/pscha005/scripts/salmon-latest_linux_x86_64/bin/
/scratch-lustre/pscha005/scripts/trinityrnaseq-Trinity-v2.8.5/Trinity --seqType fq --max_memory 768G --left SRR1646513_1.fastq,SRR1648119_1.fastq --right SRR1646513_2.fastq,SRR1648119_2.fastq --CPU 32 --output trinity2.8_output
Isoetes echinospora from Sandy Hetherington
Isoetes tegetiformans from 1KP Project
Isoetes sp. from 1KP Project
Isoetes sinensis from Yang and Liu(2015)
makeblastdb -in ../Isoetes_transcriptomes_2.fasta -dbtype nucl
head Reference_File_List.txt L1.fa L10.fa L100.fa L101.fa L102.fa L103.fa L104.fa L105.fa L106.fa L107.fa
head L1.fa
L1_LeptosporangiateMonilophytes_Polypodiales_Pteris_vittata_1REF CGCGATCCTCAATATTCTACATTAGACAAACAAGATCTTGCAGAGTTCAAAAAGGTCCTTGGTGATACTGGCGTTGTCACTGAGGCCTTTGAATTGGAGGCTGCAAACACTGATTGGCTAAGAAAATATGTTGGAACCAGCAAAGTGTTATTACGTCCATCTACAACGCAGCAA L1_Lycophytes_Selaginellales_Selaginella_moellendorffii_1REF GCCTCCAAAGTTGTAAGAGATGAACGCTTTGCAACACTGGATGATCAAGATATCAAGCACTTCTCCGGGATAGTGGGATCGAAGGGCCTGGTTGTGGACAAGGATGAATTGGAAGTGGCCAACACGGACTGGATGAGGAAGTTCAAAGGATCCGCTCAGCTCCTCCTTCGCCCCCAATCTTCAAATCAA L1_Hornworts_Dendrocerotales_Megaceros_flagellaris_1REF ATTCAGCGAGACGGGCGATTTTCTGATCTCAAGGATTCTGACATTGCCGTTTTCCAACAAATCCTGGGGGACGAGGGCGTGATTGTGGATCCGGATGAGCTGGAAGTGGCAAACACCGATTGGATGCGCAAGTACCGCGGAAATAGTAACCTGCTGTTGCGGCCCAAATCATCAGAGCAA L1_LeptosporangiateMonilophytes_Cyatheales_Thyrsopteris_elegans_1REF TCCAGAGATCCCCGATACTCTACAATCAAGGATCAAGACATTGCAGAGCTGCGCAAAATTGTTGGAGAGAAGGGTATTGTTACTGAAGAGTCTGAGCTAGAAGCTGCCAACACAGATTGGATGAGAAAGTATGTTGGAGGCAGCAAGGTGTTACTCCGCCCACGCACAACGCAGCAG L1_LeptosporangiateMonilophytes_Polypodiales_Phymatosorus_grossus_1__REF AAAGCTTCTCGGGACCCTCGATATTCCACCATAGAGGATCAAGATGTTCAACAATTTCAAAAGATTGTTGGTGAAAAGGGTGTTTTGACTGAGGAATCTGAGCTAGATGCTGCCAATACTGATTGGATGCGAAAGTATGTTGGAAGCAGCAAGGTGTTACTTTGCCCACGTACAACACAGCAA
1. Path to BLAST db
2. Txt file with list of all probe sequence files
3. FASTA file with all transcript seqs (same one used to make BLAST db)
1. Extract references for just Isoetes, just Lycophytes, and all taxa from GoFlag loci files. Output is:
Isoetes_GoFlag_references.fasta
Lycophytes_GoFlag_references.fasta
All_GoFlag_references.fasta
2. BLAST each reference file against transcriptome db. Output is:
Isoetes_BLAST_to_transcriptomes.out
Lycophytes_BLAST_to_transcriptomes.out
AllRef_BLAST_to_transcriptomes.out
3. Parse BLAST output and extract the transcript hits. Output is:
Isoetes_GoFlag_target_scaffolds.fasta
Lycophyte_GoFlag_target_scaffolds.fasta
All_GoFlag_target_scaffolds.fasta
4. Remove all duplicate trancripts. Output is:
Isoetes_Unique_GoFlag_target_scaffolds.fasta
Lycophytes_Unique_GoFlag_target_scaffolds.fasta
All_Unique_GoFlag_target_scaffolds.fasta
grep -c ">" Unique All_Unique_GoFlag_target_scaffolds.fasta:3418 Isoetes_Unique_GoFlag_target_scaffolds.fasta:3385 Lycophytes_Unique_GoFlag_target_scaffolds.fasta:3385
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/RAPiD-Genomics_F065_UFG_393202_P020_WA10_i5-514_i7-54_S2005_L007_R1_001_val_1.fq ../Schafran_GoFlag/A01_trimmedReads/RAPiD-Genomics_F065_UFG_393202_P020_WA10_i5-514_i7-54_S2005_L007_R2_001_val_2.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --bwa --prefix I_engelmannii_WA10
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/RAPiD-Genomics_F065_UFG_393202_P020_WA11_i5-514_i7-25_S2006_L007_R1_001_val_1.fq ../Schafran_GoFlag/A01_trimmedReads/RAPiD-Genomics_F065_UFG_393202_P020_WA11_i5-514_i7-25_S2006_L007_R2_001_val_2.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_boomii_WA11 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WA12*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_butleri_WA12 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WB10*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_georgiana_WB10 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WB11*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_mattaponica_WB11 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WB12*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_lithophila_WB12 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WC10*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_appalachiana_WC10 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WC11*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_valida_WC11 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WC12*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_louisianensis_WC12 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WD10*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_microvela_WD10 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WD11*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_flaccida_WD11 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WD12*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_boliviensis_WD12 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WE10*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_septentrionalis_WE10 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WE11*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_flaccida_WE11 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WE12*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_tuckermanii_WE12 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WF10*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_virginica_WF10 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WF11*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_piedmontana_WF11 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WF12*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_storkii_WF12 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WG10*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_hyemalis_WG10 --bwa
/Applications/Phylogenetics/HybPiper/HybPiper/reads_first.py -r ../Schafran_GoFlag/A01_trimmedReads/*WG11*.fq -b ./Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_melanopoda_WG11 --bwa
for i in I_*; do cleanup.py $i; done
python /Applications/Phylogenetics/HybPiper/HybPiper/get_seq_lengths.py Isoetes_Unique_GoFlag_target_scaffolds.fasta namelist.txt dna > sequence_lengths.txt # must be called with "python get_seq_lengths.py ..." due to preserve correct sys.argv order
head sequence_lengths.txt
Species L78 L79 L72 L73 L70 L71 L76 L77 L74 L75 L321 L320 L323 L289 L325 L324 L327 L326 L329 L283 L280 L281 L286 L284 L285 L354 L355 L356 L357 L219 L218 L352 L353 L215 L214 L217 L358 L359 L213 L212 L422 L423 L420 L421 L426 L427 L425 L428 L429 L322 L130 L282 L132 L133 L134 L135 L136 L137 L138 L139 L328 L43 L42 L41 L40 L47 L46 L45 L44 L49 L48 L251 L250 L253 L252 L255 L254 L257 L256 L259 L258 L318 L319 L310 L311 L312 L315 L316 L317 L36 L37 L34 L35 L32 L33 L30 L31 L38 L39 L224 L225 L226 L227 L220 L221 L222 L223 L228 L229 L266 L6 L7 L4 L5 L2 L3 L1 L8 L9 L169 L168 L167 L166 L165 L164 L163 L162 L161 L160 L295 L294 L297 L296 L291 L290 L293 L292 L299 L298 L350 L348 L351 L211 L210 L347 L269 L345 L344 L343 L342 L341 L260 L261 L262 L263 L264 L265 L349 L267 L439 L435 L434 L437 L436 L431 L430 L433 L432 L123 L122 L121 L120 L127 L126 L125 L124 L129 L128 L196 L197 L194 L195 L192 L193 L190 L191 L198 L199 L390 L391 L392 L394 L395 L396 L397 L398 L399 L453 L452 L50 L51 L52 L53 L54 L55 L56 L57 L58 L59 L309 L308 L303 L302 L301 L300 L307 L306 L305 L304 L237 L236 L235 L234 L232 L231 L230 L239 L238 L404 L405 L406 L407 L400 L401 L402 L403 L408 L409 L158 L159 L152 L153 L150 L151 L156 L157 L154 L155 L18 L19 L14 L15 L16 L17 L10 L11 L12 L87 L86 L85 L84 L83 L82 L81 L80 L89 L88 L372 L373 L370 L278 L376 L377 L374 L375 L273 L272 L378 L270 L277 L276 L275 L274 L448 L449 L440 L441 L442 L443 L444 L445 L446 L447 L118 L119 L116 L117 L114 L115 L112 L113 L279 L371 L181 L180 L183 L182 L185 L184 L187 L186 L189 L188 L271 L383 L382 L381 L380 L387 L386 L385 L384 L389 L388 L65 L64 L67 L66 L61 L60 L63 L62 L69 L68 L336 L337 L334 L335 L332 L333 L330 L331 L338 L339 L202 L203 L200 L201 L206 L207 L204 L205 L209 L417 L416 L415 L414 L413 L412 L411 L410 L419 L418 L145 L144 L147 L146 L141 L143 L142 L149 L148 L94 L95 L96 L97 L90 L91 L92 L93 L98 L99 L246 L247 L244 L245 L242 L243 L240 L248 L249 L369 L368 L365 L364 L367 L366 L361 L360 L363 L362 L21 L20 L23 L22 L25 L24 L27 L26 L29 L28 L451 L450 L109 L108 L101 L100 L103 L102 L105 L107 L106 L379 L178 L179 L174 L175 L176 L177 L170 L171 L172 L173 L268 L346
MeanLength 2832 2865 1202 926 1161 878 1381 1381 1009 1175 1586 1088 1873 1526 982 1775 693 752 1717 1976 738 3678 1598 1951 1480 1160 1097 4205 1823 1504 2368 2476 2731 2573 2573 2022 2128 2378 2573 2589 1013 419 1607 1704 1134 2613 943 1138 1028 1624 1191 2304 1377 1269 993 1280 1288 1794 1620 1799 1268 2700 2340 2129 2625 2380 2599 2230 1886 1163 1379 1647 1289 2180 1673 1767 2180 1909 1761 2333 2333 1780 1780 1846 1846 1493 3174 3174 3174 1561 886 754 1188 1794 842 1630 1840 1262 1311 963 1385 1317 1229 1825 2045 2045 963 591 2257 1691 2622 1490 2427 2622 2439 2431 1492 1692 1230 522 1793 1280 1757 674 1603 1792 1985 1985 1087 1249 1804 910 1401 1658 1716 1449 1140 877 1016 761 1524 1007 2589 2389 1353 1156 1011 733 815 963 909 2264 1164 987 1826 1771 1771 1822 1742 3019 1676 1110 1358 1722 926 456 1227 1281 1350 699 687 868 1189 612 769 1302 1202 880 1833 2235 1799 3101 1193 730 1782 1203 998 1423 1325 1857 1669 648 597 839 420 983 1166 1789 1460 1016 1198 1238 1859 792 944 1097 1770 1723 1309 1337 1590 1448 1644 1201 671 1370 1525 1465 1980 799 873 703 1570 1114 1525 1422 827 827 1049 1295 1293 1558 957 848 907 939 1326 1587 1832 1039 1390 1134 1189 1390 1414 1801 1307 1446 1849 1258 2053 1046 1180 1443 1310 1058 1595 1022 1492 907 1457 1710 1248 1123 1097 2703 1110 1073 1262 1396 1143 1335 1469 1445 1768 706 762 1194 1236 803 1232 1176 1964 1704 1704 1710 913 1088 863 797 859 809 923 821 865 3732 1063 1451 1535 1412 1450 711 934 1776 2214 1912 1982 1926 1774 995 1416 1782 1687 1477 742 836 924 1005 1654 1245 925 1150 1530 1361 1685 1834 1185 1220 1449 1663 1654 2082 891 1597 1410 1059 1412 1409 999 1020 1509 1509 1600 1055 2443 2466 2036 1235 1088 544 2331 1157 3076 2736 2736 883 1140 1282 1254 1089 1549 1590 1889 3369 1628 3513 3513 3722 1675 1926 1189 1739 2190 1867 781 1752 3218 2962 2962 1880 1542 1014 2675 2949 1318 1250 2026 1348 1391 1198 1468 2105 1837 920 1587 721 1215 1677 667 1585 1189 1418 1257 1485 1072 990 2058 1239 1262 1630 1041 1123 1946 1949 1797 1881 1678 1556 1642 969 2012 1640 1245 2042 2289 1780 2736 2736 2035 1588 1554 1432 1653 1532 1308
I_engelmannii_WA10 1224 0 792 303 465 393 1392 1392 486 822 609 519 954 594 837 843 537 378 909 336 759 240 447 1146 414 594 561 414 309 957 600 2688 2724 684 681 870 1371 792 684 870 693 207 303 357 393 0 351 708 708 825 516 1155 837 414 0 606 0 1737 1722 1722 849 1293 876 876 1947 435 798 351 165 399 678 1113 765 1221 1119 765 1236 885 603 888 597 789 747 1323 1341 189 945 945 945 1074 735 321 1293 1278 336 912 1299 906 72 333 543 651 414 501 543 960 264 429 1062 1266 564 1197 438 693 522 597 528 1698 1035 297 1014 345 858 363 687 555 801 948 381 372 561 444 474 573 252 426 573 645 228 708 501 339 870 825 909 321 876 876 162 759 423 1035 450 375 792 612 702 306 831 876 981 468 723 1050 582 453 855 729 261 624 624 390 738 417 384 516 912 0 720 834 516 0 0 441 786 147 510 279 480 249 525 537 813 369 477 366 0 342 723 417 447 345 1188 831 453 261 387 372 1173 117 381 0 324 855 948 333 663 633 723 711 438 1779 1005 465 699 621 234 360 534 906 903 534 93 261 276 1158 915 540 1170 807 591 807 330 630 279 237 825 906 408 870 561 294 1452 714 456 726 525 411 762 1035 0 534 552 2769 0 855 726 549 1086 135 606 612 711 240 318 0 294 1095 579 705 453 822 981 405 642 1035 471 0 0 0 939 345 180 465 843 363 618 195 189 819 0 813 312 720 405 783 1302 318 459 951 378 936 573 291 459 666 1383 528 1065 1059 1104 369 300 411 531 1035 540 759 354 1260 474 819 576 714 546 375 399 987 393 393 573 291 1638 1719 600 858 0 231 2142 0 540 1701 1692 333 612 738 1263 609 318 780 411 750 750 1458 1395 981 750 750 780 375 612 504 0 345 2847 2802 2721 669 1119 402 462 687 651 540 0 702 1188 807 297 624 477 627 0 339 441 0 0 432 633 258 0 456 852 270 909 804 540 0 672 1038 1365 351 486 594 255 414 1698 1293 660 0 777 288 774 768 1290 717 540 579 735 921 993
I_boomii_WA11 1005 0 792 303 465 393 1143 1392 1392 384 675 693 732 675 786 738 606 378 966 327 828 282 459 531 498 438 480 507 396 501 1200 2121 2277 1122 756 852 588 852 756 837 375 189 294 279 393 0 375 198 837 939 417 573 345 414 0 534 1677 1746 1491 1704 909 1485 2184 753 1476 525 1473 1512 165 399 717 975 537 1041 975 720 975 1122 495 1077 1023 657 960 1431 1158 189 957 876 957 1299 570 354 798 1026 336 789 1119 771 819 372 468 651 129 501 1011 1065 351 339 954 1350 594 1161 744 537 516 576 588 1206 1227 159 1308 345 612 426 579 558 825 888 897 369 549 789 534 519 252 429 579 1161 345 591 480 375 873 861 906 321 150 873 162 162 0 1056 357 372 708 480 585 165 870 810 831 555 549 1098 522 270 936 729 258 333 333 390 1026 417 384 516 657 0 888 705 504 0 132 633 366 150 510 204 480 444 468 525 813 588 477 537 0 0 342 954 420 363 348 717 552 456 345 375 372 1065 90 471 0 324 858 1134 333 351 330 639 804 429 351 414 465 345 708 441 300 534 894 933 630 234 360 276 981 912 210 1311 1272 702 849 324 630 408 330 954 939 594 870 870 237 705 690 492 621 507 738 867 1032 0 954 564 2790 0 795 864 531 456 399 534 666 717 240 318 0 0 327 732 456 561 891 648 354 759 609 240 0 0 0 987 345 180 285 492 363 585 0 513 822 0 399 312 546 726 510 552 318 501 831 348 1806 573 261 453 552 1212 489 885 885 1062 369 366 411 483 447 462 630 354 408 564 651 714 483 546 372 411 966 393 528 663 441 1848 1509 357 903 0 435 225 0 873 1266 1446 327 552 249 1056 453 405 816 303 708 372 996 1296 1158 834 531 678 135 369 501 0 345 2475 2826 2544 681 891 387 621 699 651 486 873 612 936 735 228 840 489 759 0 900 438 0 0 414 573 159 1548 495 855 291 504 786 615 0 759 1827 1572 186 0 594 246 357 1023 3366 441 0 1281 357 735 843 792 663 540 591 708 267 930
I_butleri_WA12 1365 0 684 303 465 1134 1323 1323 1389 507 186 1167 615 717 282 114 480 339 816 330 600 138 444 576 378 486 558 549 504 1095 1041 2013 2007 693 1014 1398 789 789 681 1200 597 255 303 279 516 0 375 438 825 699 456 1416 867 414 0 549 1635 1536 1575 1635 837 1224 873 1899 1926 603 1461 351 351 399 636 1029 333 939 1032 555 1044 1137 510 1041 1086 894 726 1437 1449 522 879 870 879 1107 441 321 1266 1512 714 660 1122 1032 708 321 543 636 210 504 972 927 267 429 864 354 666 1143 0 723 612 624 588 1179 1005 225 1107 345 819 693 429 606 876 780 375 486 735 0 573 315 252 477 543 951 579 576 426 375 870 993 879 657 243 864 846 885 426 1110 357 372 864 489 831 330 801 843 846 405 708 1365 549 387 747 729 258 705 333 384 261 351 384 516 891 0 771 747 513 0 132 612 1428 204 489 603 480 447 825 531 807 477 477 393 0 0 342 924 384 363 399 1053 585 588 345 396 372 1083 90 486 0 324 1212 273 366 435 957 723 714 444 351 1095 468 345 345 234 357 363 1326 1236 534 93 840 276 1836 1008 414 1395 1170 675 1008 330 675 435 210 747 897 537 873 873 300 714 534 492 777 531 765 930 1164 0 900 636 2547 0 531 798 729 1041 612 597 588 717 240 315 0 300 717 525 393 462 876 822 360 501 930 705 0 0 0 1296 360 180 285 873 354 606 195 189 801 0 462 312 738 399 852 774 300 408 1458 558 705 576 291 744 645 1269 585 699 1119 1395 366 342 1200 540 447 594 438 354 1125 621 888 546 597 546 351 345 990 564 564 411 441 594 1791 510 873 0 435 1143 0 852 1524 1602 588 606 483 1047 780 402 801 711 594 594 1497 1227 873 618 774 630 135 513 504 0 345 2667 2709 2427 711 963 408 978 693 651 534 879 990 948 732 228 723 486 666 0 939 498 0 0 396 507 258 1227 357 909 309 543 789 687 0 672 2010 1830 402 546 363 507 336 1905 2391 441 1206 663 636 711 711 1278 672 543 498 681 585 987
I_georgiana_WB10 972 0 588 537 465 393 1257 1377 1356 375 513 582 192 510 219 462 510 393 783 336 621 0 462 1182 399 108 528 0 306 501 330 2079 2079 0 366 0 981 585 267 354 381 207 303 279 468 0 534 186 828 744 405 543 564 216 0 579 1269 1473 1434 1143 681 1347 876 630 1311 465 1170 351 165 399 720 990 660 801 891 525 780 732 519 204 927 99 441 1227 1107 189 945 945 876 978 498 330 1095 960 336 564 1089 774 741 240 354 486 447 501 684 912 267 243 849 1314 516 912 480 585 516 495 570 1182 750 252 1074 342 501 354 621 411 657 624 210 372 396 0 441 534 252 510 519 855 228 567 447 117 483 0 861 333 882 843 708 759 261 930 288 351 615 423 459 246 837 0 795 360 699 1242 522 276 522 564 258 480 630 453 300 411 384 516 570 0 834 516 522 0 0 432 501 147 387 462 480 399 564 537 813 459 477 537 0 342 705 420 492 345 783 753 453 345 456 369 342 117 378 0 324 741 948 333 435 612 882 507 438 1785 498 405 345 699 234 0 294 1026 819 441 0 204 276 1029 870 423 891 807 465 807 330 456 279 330 609 666 528 798 867 225 393 660 591 603 582 567 507 864 0 606 729 2226 0 843 747 531 456 135 606 630 408 240 285 0 285 321 585 498 300 552 621 405 360 264 570 0 0 0 1017 342 180 285 789 459 426 0 189 735 0 399 312 657 618 303 528 318 408 555 228 1608 900 291 453 540 411 483 636 1131 1293 369 0 231 483 504 381 234 354 600 366 651 390 417 546 375 351 669 273 522 501 294 657 819 453 495 0 435 813 0 483 780 825 516 441 249 1158 582 354 750 777 645 453 1269 1401 927 333 210 387 135 420 0 0 345 2445 2589 2121 711 972 387 537 699 651 381 756 672 1398 882 228 711 480 708 0 339 360 0 0 414 561 192 972 366 948 102 282 657 423 0 657 651 654 273 219 594 192 336 1668 2034 441 777 690 438 534 777 1125 453 540 498 546 318 786
I_mattaponica_WB11 1065 1212 657 375 465 393 1392 1383 1392 408 744 636 894 522 327 417 516 378 840 225 759 294 432 780 498 381 480 111 357 456 330 2130 2130 612 405 1110 852 852 759 465 528 168 300 354 240 0 375 408 750 870 591 369 792 372 0 531 1398 1590 1506 1680 849 1428 1764 870 1359 600 1299 1521 1494 399 717 909 522 975 1023 648 969 630 519 942 960 804 849 1152 1077 189 939 957 939 1206 738 651 996 1419 336 654 1116 954 657 333 462 486 345 1053 948 1104 267 243 876 1410 396 1083 489 510 0 585 621 1491 1236 219 1008 345 834 423 429 561 804 756 333 375 504 0 0 459 252 402 513 492 330 732 417 324 483 363 867 321 876 876 162 852 0 1107 291 375 810 534 837 249 825 786 687 480 702 783 168 282 618 621 267 330 333 549 483 432 384 105 588 0 873 723 516 0 0 438 813 0 387 795 480 444 351 537 810 510 468 378 0 336 918 372 447 213 906 693 456 345 279 366 783 90 441 0 318 645 1032 333 357 426 816 495 438 348 507 384 345 696 441 300 429 759 1005 495 234 717 276 1005 1020 483 795 1056 441 795 330 582 279 210 798 873 408 558 870 0 462 720 714 654 534 552 528 1041 0 597 930 2445 0 492 810 531 834 636 471 900 447 240 306 0 0 327 0 453 381 897 1008 330 945 561 240 0 0 0 858 345 180 285 447 363 489 189 189 795 0 816 312 552 690 846 552 318 405 915 0 1971 1041 288 453 444 1065 687 810 900 1269 363 417 1212 483 690 591 420 354 564 576 867 726 540 276 333 660 813 483 618 582 348 1326 1776 489 858 0 435 1248 0 483 1212 1272 267 495 417 1071 672 399 699 687 759 570 1305 1395 990 708 789 657 228 423 0 0 711 2577 2652 2718 732 1236 408 666 699 651 624 906 741 936 840 228 375 447 690 0 900 441 0 0 414 573 156 891 297 723 213 852 765 753 0 840 1464 930 258 0 675 498 402 1941 1722 441 0 618 252 717 777 690 705 537 351 564 216 957
I_lithophila_WB12 1152 0 546 303 450 918 1404 1188 1308 594 969 639 945 609 282 930 564 378 708 336 723 150 465 1788 498 468 420 0 354 501 735 2028 2037 0 0 363 1545 1362 315 519 564 168 300 342 486 0 450 411 645 627 333 228 516 408 0 576 1479 1572 1428 1686 801 1359 1119 579 1440 0 1266 1437 165 399 720 915 555 1185 771 858 1035 414 543 264 702 810 810 690 348 189 876 930 930 876 813 324 1329 1053 351 657 1053 927 735 336 543 654 210 501 831 1026 267 339 897 1212 516 1089 624 288 489 594 519 1404 696 0 0 345 315 351 426 471 768 654 198 372 642 114 216 399 252 501 543 597 279 573 381 375 0 0 906 321 228 876 759 732 345 1764 288 375 630 624 456 309 858 966 918 606 702 1188 465 225 645 597 261 468 636 390 447 420 384 447 789 0 771 549 507 0 147 591 954 147 387 774 393 447 816 339 807 474 477 399 0 342 726 417 315 339 891 669 459 333 342 312 942 81 489 0 321 744 837 321 306 828 723 855 435 1740 909 447 345 345 234 144 468 900 885 534 0 435 276 1098 705 414 795 873 846 858 330 615 336 330 795 915 333 504 873 321 699 882 372 792 486 561 609 1014 0 783 945 2544 0 762 849 606 456 579 480 960 714 240 312 0 285 324 612 429 255 789 747 0 768 978 672 0 0 0 1050 312 180 285 549 456 552 0 558 852 0 579 312 735 690 897 630 0 501 909 165 1749 780 291 450 477 1278 510 687 1806 1305 369 96 714 486 948 531 435 354 0 357 807 576 480 546 372 402 846 375 393 591 345 738 1110 435 657 0 435 972 0 0 1002 1041 249 483 351 744 618 336 813 591 795 627 1338 1482 1185 876 402 639 324 417 504 0 345 2370 2712 2586 771 1278 375 708 696 651 363 807 813 1494 765 228 690 303 666 0 1002 438 0 0 417 570 258 1134 387 852 189 870 645 654 0 708 1392 1284 186 579 699 192 336 1332 870 441 168 1473 576 627 696 1269 831 540 528 354 519 324
I_appalachiana_WC10 1236 0 684 303 465 393 1398 1374 1383 1221 597 516 693 594 786 534 549 363 873 315 771 345 444 1989 486 483 759 600 303 687 1200 1431 2145 1791 1389 1533 792 1476 1791 1437 660 168 303 357 357 0 375 432 714 747 342 864 642 414 0 606 1722 1647 1722 1533 777 1758 2145 2136 2340 600 1746 351 1734 399 720 897 600 1221 948 657 792 915 552 1077 981 810 1005 1209 1344 189 945 945 876 1296 567 663 975 1053 336 765 1053 753 735 288 633 540 213 501 996 1188 234 243 702 1311 987 837 507 915 1008 648 699 1254 1311 300 1002 345 1362 489 429 663 840 780 978 375 438 315 690 459 252 432 468 795 474 573 336 360 1671 1512 744 321 216 936 162 162 423 1071 348 375 678 504 513 198 801 855 831 363 696 1029 522 444 1080 645 264 627 564 450 759 429 384 606 906 0 744 576 516 0 192 618 1212 213 387 282 393 387 465 537 813 531 477 402 0 342 780 417 363 279 882 672 453 258 129 372 897 117 489 0 321 861 948 333 300 723 741 705 438 345 696 444 345 621 234 360 378 948 1014 396 222 627 276 1311 843 423 1278 1170 375 1212 330 615 315 0 174 828 519 522 870 342 630 729 393 486 534 567 726 1032 0 543 633 2754 0 936 834 555 840 615 492 390 573 240 318 0 579 321 507 501 372 900 900 210 474 687 744 0 0 0 1017 345 177 285 615 1014 471 192 465 840 0 609 312 735 666 993 789 318 486 1098 348 2196 573 291 420 450 1245 450 1116 633 1209 366 300 411 483 297 537 699 354 1665 579 786 705 447 552 375 402 957 453 453 609 294 1689 1770 465 540 0 435 1548 0 1728 1440 1425 333 483 249 1341 570 417 708 750 708 477 1452 1473 1020 876 648 849 135 531 477 0 345 2481 120 2760 651 1779 396 669 702 630 381 939 792 1068 606 228 768 471 600 0 930 486 0 0 417 636 198 1206 168 1089 273 801 732 585 0 774 1122 1365 489 501 591 414 339 1395 1506 441 0 1434 417 675 774 1326 858 540 471 729 702 885
I_valida_WC11 1371 1095 804 303 465 948 1398 1398 1398 714 513 645 570 702 741 831 618 378 1008 336 768 459 462 2106 495 486 786 468 477 1203 1353 2007 2055 756 1434 1200 822 1419 1611 870 570 228 303 357 0 0 363 384 837 903 498 1188 867 414 0 531 1677 1677 1584 1704 885 1779 876 2040 2370 600 1296 351 165 399 1548 1152 597 1050 1215 930 1188 972 603 972 1134 1155 546 1482 1503 189 876 876 876 1272 645 618 975 1167 336 801 1260 1326 912 333 546 474 213 504 1029 930 234 393 1083 1452 987 1074 156 969 1080 537 660 1248 1341 369 759 345 879 477 972 852 819 747 876 372 627 0 609 618 252 510 444 870 384 732 468 360 870 840 990 321 216 1002 873 903 384 1194 402 375 807 591 894 279 912 912 990 513 693 1119 597 435 678 732 261 333 642 513 807 444 450 1173 768 0 780 546 516 0 228 636 1443 228 369 858 480 447 651 534 813 486 477 423 0 0 342 903 417 363 339 1122 732 459 321 396 372 1113 309 489 0 324 852 1137 333 459 783 906 816 438 345 1074 468 345 345 234 360 462 741 516 597 276 873 267 1467 807 372 1278 1278 909 1278 330 630 405 210 891 969 600 867 867 264 699 957 492 498 567 552 822 1110 0 1020 654 2577 0 1020 753 678 456 531 612 516 447 240 318 0 747 324 0 519 336 933 915 210 591 825 852 0 0 0 954 345 180 285 855 360 657 192 189 852 0 729 312 738 396 705 726 315 501 1941 348 984 573 291 732 678 1440 690 855 984 1356 369 291 411 828 816 471 828 354 1581 684 798 753 627 375 375 402 918 621 501 609 441 1989 2088 417 813 0 435 1947 0 870 1680 1599 249 618 249 1365 636 405 816 528 792 477 1431 1389 990 876 687 801 135 576 504 0 345 2673 54 2589 705 699 408 924 699 648 381 867 957 957 798 228 975 246 741 0 951 429 0 0 417 498 258 1341 216 1140 252 603 720 552 0 756 1467 1665 141 864 381 711 414 1464 2211 441 0 1494 690 735 735 1317 795 540 480 750 636 957
python /Applications/Phylogenetics/HybPiper/HybPiper/hybpiper_stats.py sequence_lengths.txt namelist.txt > statistics.txt
less statistics.txt
Name NumReads ReadsMapped PctOnTarget GenesMapped GenesWithContigs GenesWithSeqs GenesAt25pct GenesAt50pct GenesAt75pct Genesat150pct ParalogWarnings
I_melanopoda_WG11 902316 451481 0.500 422 415 409 356 181 79 1 20
I_microvela_WD10 1213694 607293 0.500 426 414 408 353 195 91 2 24
I_septentrionalis_WE10 812661 406427 0.500 422 413 408 352 166 80 1 24
I_appalachiana_WC10 1417393 708861 0.500 425 414 408 346 161 69 0 24
I_storkii_WF12 649777 325088 0.500 421 410 400 324 143 58 1 8
I_lithophila_WB12 790980 395595 0.500 424 405 396 321 142 54 2 8
I_engelmannii_WA10 406510 203355 0.500 421 409 406 341 161 62 1 11
I_junciformis_WH10 656061 328141 0.500 420 414 405 329 137 65 2 14
I_butleri_WA12 696404 348321 0.500 422 414 408 351 155 79 0 14
I_boliviensis_WD12 914794 457685 0.500 424 414 406 324 127 44 2 11
I_boomii_WA11 532021 266105 0.500 420 413 405 339 141 61 1 5
I_melanopoda_WH11 923495 462018 0.500 421 414 408 358 183 83 1 12
I_flaccida_WD11 534957 267729 0.500 420 413 409 352 181 86 2 15
I_flaccida_WE11 1396498 698613 0.500 426 416 406 360 199 92 1 15
I_valida_WC11 985647 493162 0.500 422 413 408 350 183 86 0 11
I_virginica_WF10 627113 313586 0.500 424 412 408 343 155 61 3 24
I_hyemalis_WG10 1009503 504953 0.500 423 414 407 356 185 81 2 17
I_georgiana_WB10 744855 372386 0.500 422 410 398 300 114 45 1 9
I_louisianensis_WC12 493236 246779 0.500 418 413 404 317 126 50 1 14
I_piedmontana_WF11 827266 413804 0.500 420 415 408 356 185 73 2 19
I_mattaponica_WB11 620550 310353 0.500 424 411 400 330 139 57 0 10
I_tuckermanii_WE12 1585951 793293 0.500 427 415 408 360 209 107 3 15
for i in I_*; do python /Applications/Phylogenetics/HybPiper/HybPiper/intronerate.py --prefix $i; done
Below:
python /Applications/Phylogenetics/HybPiper/HybPiper/retrieve_sequences.py Isoetes_Unique_GoFlag_target_scaffolds.fasta . dna
python /Applications/Phylogenetics/HybPiper/HybPiper/retrieve_sequences.py Isoetes_Unique_GoFlag_target_scaffolds.fasta . intron
python /Applications/Phylogenetics/HybPiper/HybPiper/retrieve_sequences.py Isoetes_Unique_GoFlag_target_scaffolds.fasta . supercontig
while read i; do python /Applications/Phylogenetics/HybPiper/HybPiper/paralog_investigator.py $i; done < namelist.txt
while read i; do python /Applications/Phylogenetics/HybPiper/HybPiper/paralog_retriever.py namelist.txt $i > $i.paralogs.fasta; done < all_loci.txt
for i in *.paralogs.fasta; do mafft --auto $i > MAFFT.$i; done
for i in MAFFT*.paralogs.fasta; do /Applications/Phylogenetics/trimAl/source/trimal -automated1 -in $i -out TRIM.$i; done
for i in TRIM*.paralogs.fasta; do raxmlHPC -s $i -n $i -m GTRGAMMA -f a -x 1 -N 100 -d -p 3556; done
#AUTO
/Applications/Phylogenetics/trimAl/source/trimal -in $i -out AUTO.TRIM.$i -automated1
#STRICT
/Applications/Phylogenetics/trimAl/source/trimal -in $i -out STRICT.TRIM.$i -strict
#SUPERSTRICT
/Applications/Phylogenetics/trimAl/source/trimal -in $i -out SUPERSTRICT.TRIM.$i -gt 0.8 -st 0.01 -resoverlap 0.9 -seqoverlap 80
for i in FILES; do num=$(grep -c ">" $i); if [ $num = 22 ] ; then COMMAND; fi; done
#!/bin/bash -l
#SBATCH --mail-user=pscha005@odu.edu
#SBATCH --mail-type=END
#SBATCH --job-name=IQTREE
for i in SUPER*.FNA ; do num=$(grep -c ">" $i ) ; if [ $num = 22 ] ; then /scratch-lustre/pscha005/scripts/iqtree-1.6.11-Linux/bin/iqtree -s $i -m MFP -bb 5000 -alrt 5000 -nm 5000 ; fi ; done
Working Directory: /Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/
phyluce_assembly_get_match_counts --locus-db ./noSinensisRef/target_loci/probe.matches.sqlite --taxon-list-config polyploids.txt --taxon-group 'polyploids' --incomplete-matrix --output ./polyploids/polyploids-incomplete.conf
phyluce_assembly_get_fastas_from_match_counts --contigs ./noSinensisRef/assembly/contigs/ --locus-db ./noSinensisRef/target_loci/probe.matches.sqlite --match-count-output ./polyploids/polyploids-incomplete.conf --output ./polyploids/polyploids.incomplete.fasta --incomplete-matrix polyploids-incomplete.incomplete --log-path ./polyploids/
phyluce_align_seqcap_align --fasta ./polyploids/polyploids.incomplete.fasta --output ./polyploids/mafft-incomplete/ --taxa 11 --incomplete-matrix --output-format fasta --log-path ./polyploids/
phyluce_align_seqcap_align --fasta ./polyploids/polyploids.incomplete.fasta --output ./polyploids/mafft-incomplete-internal-trimmed/ --taxa 11 --log-path ./polyploids/ --output-format fasta --no-trim --incomplete-matrix
phyluce_align_get_gblocks_trimmed_alignments_from_untrimmed --alignments ./polyploids/mafft-incomplete-internal-trimmed/ --output ./polyploids/mafft-incomplete-internal-trimmed-gblocks/ --output-format fasta --log ./polyploids/
phyluce_align_remove_locus_name_from_nexus_lines --alignments ./polyploids/mafft-incomplete-internal-trimmed-gblocks/ --output ./polyploids/mafft-incomplete-internal-trimmed-gblocks-cleaned/ --output-format fasta --log-path ./polyploids/
for i in Isoetes_* ; do spades.py -1 $i/split-adapter-quality-trimmed/*READ1* -2 $i/split-adapter-quality-trimmed/*READ2* --careful -o $i/split-adapter-quality-trimmed/spades/ ; done
for i in Isoetes_* ; do cp $i/split-adapter-quality-trimmed/spades_filtered/scaffolds.fasta $i/split-adapter-quality-trimmed/ ; bwa index $i/split-adapter-quality-trimmed/scaffolds.fasta ; bwa mem $i/split-adapter-quality-trimmed/scaffolds.fasta $i/split-adapter-quality-trimmed/*R1* $i/split-adapter-quality-trimmed/*R2* > $i/split-adapter-quality-trimmed/$i.sam ; samtools view -S -b $i/split-adapter-quality-trimmed/$i.sam > $i/split-adapter-quality-trimmed/$i.bam ; samtools sort $i/split-adapter-quality-trimmed/$i.bam $i/split-adapter-quality-trimmed/$i.sorted ; samtools phase -AF -b $i $i/split-adapter-quality-trimmed/$i.sorted.bam ; done
### BWA
### Loops through all sample directories, build BWA index from SPAdes contigs, build SAM alignment of samples fastq reads against contigs
for i in Isoetes_* ; do bwa index $i/split-adapter-quality-trimmed/spades/contigs.fasta ; bwa mem $i/split-adapter-quality-trimmed/spades/contigs.fasta $i/split-adapter-quality-trimmed/*READ1* $i/split-adapter-quality-trimmed/*READ2* > $i/split-adapter-quality-trimmed/spades/$i.sam; done
### Convert SAM to BAM files
for i in Isoetes_* ; do samtools view -bS $i/split-adapter-quality-trimmed/spades/$i.sam > $i/split-adapter-quality-trimmed/spades/$i.bam ; done
### Sort BAM files prior to phaseing
for i in Isoetes_* ; do samtools sort -o $i/split-adapter-quality-trimmed/spades/$i.sorted.bam $i/split-adapter-quality-trimmed/spades/$i.bam ; done
### Phase BAM files
for i in Isoetes_* ; do samtools phase -b $i/split-adapter-quality-trimmed/spades/$i $i/split-adapter-quality-trimmed/spades/$i.sorted.bam ; done
### Extract reads from phased BAM files
for i in Isoetes_* ; do samtools sort -n -o $i/split-adapter-quality-trimmed/spades/$i.0.sort.bam $i/split-adapter-quality-trimmed/spades/$i.0.bam && samtools fixmate -m $i/split-adapter-quality-trimmed/spades/$i.0.sort.bam $i/split-adapter-quality-trimmed/spades/$i.0.fixMate.bam && samtools sort -o $i/split-adapter-quality-trimmed/spades/$i.0.sort.bam $i/split-adapter-quality-trimmed/spades/$i.0.fixMate.bam && samtools markdup -r $i/split-adapter-quality-trimmed/spades/$i.0.sort.bam $i/split-adapter-quality-trimmed/spades/$i.0.rmDup.bam && bam2fastq $i/split-adapter-quality-trimmed/spades/$i.0.rmDup.bam -o $i/split-adapter-quality-trimmed/spades/Phase0_READ#.fastq ; done
for i in Isoetes_* ; do samtools sort -n -o $i/split-adapter-quality-trimmed/spades/$i.1.sort.bam $i/split-adapter-quality-trimmed/spades/$i.1.bam && samtools fixmate -m $i/split-adapter-quality-trimmed/spades/$i.1.sort.bam $i/split-adapter-quality-trimmed/spades/$i.1.fixMate.bam && samtools sort -o $i/split-adapter-quality-trimmed/spades/$i.1.sort.bam $i/split-adapter-quality-trimmed/spades/$i.1.fixMate.bam && samtools markdup -r $i/split-adapter-quality-trimmed/spades/$i.1.sort.bam $i/split-adapter-quality-trimmed/spades/$i.1.rmDup.bam && bam2fastq $i/split-adapter-quality-trimmed/spades/$i.1.rmDup.bam -o $i/split-adapter-quality-trimmed/spades/Phase1_READ#.fastq ; done
samtools sort -n -o Isoetes_septentrionalis.0.sort.bam Isoetes_septentrionalis.0.bam
samtools fixmate -m Isoetes_septentrionalis.0.sort.bam Isoetes_septentrionalis.fixMate.bam
samtools sort -o Isoetes_septentrionalis.0.sort.bam Isoetes_septentrionalis.fixMate.bam
samtools markdup -r Isoetes_septentrionalis.0.sort.bam Isoetes_septentrionalis.rmDup.bam
bam2fastq Isoetes_septentrionalis.rmDup.bam -o Isoetes_septPhase0-R#.fastq
samtools sort -n -o sort.bam Isoetes_septentrionalis.0.bam && samtools fixmate -m sort.bam fixMate.bam && samtools sort -o sort.bam fixMate.bam && samtools markdup -r sort.bam rmDup.bam && bam2fastq rmDup.bam -o Isoetes_septPhase0-R#.fastq
phasedSamples.txt
[samples] Isoetes_appPhase0:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_appPhase0/split-adapter-quality-trimmed/ Isoetes_appPhase1:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_appPhase1/split-adapter-quality-trimmed/ Isoetes_boliPhase0:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_boliPhase0/ Isoetes_boliPhase1:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_boliPhase1/ Isoetes_hyemPhase0:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_hyemPhase0/ Isoetes_hyemPhase1:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_hyemPhase1/ Isoetes_juncPhase0:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_juncPhase0/ Isoetes_juncPhase1:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_juncPhase1/ Isoetes_louiPhase0:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_louiPhase0/ Isoetes_louiPhase1:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_louiPhase1/ Isoetes_septPhase0:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_septPhase0/split-adapter-quality-trimmed/ Isoetes_septPhase1:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_septPhase1/split-adapter-quality-trimmed/ Isoetes_tuckPhase0:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_tuckPhase0/split-adapter-quality-trimmed/ Isoetes_tuckPhase1:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_tuckPhase1/split-adapter-quality-trimmed/ Isoetes_virgPhase0:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_virgPhase0/ Isoetes_virgPhase1:/Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_virgPhase1/
phased-dataset.txt
[phased-dataset] Isoetes_appPhase0 Isoetes_appPhase1 Isoetes_boliPhase0 Isoetes_boliPhase1 Isoetes_hyemPhase0 Isoetes_hyemPhase1 Isoetes_juncPhase0 Isoetes_juncPhase1 Isoetes_louiPhase0 Isoetes_louiPhase1 Isoetes_septPhase0 Isoetes_septPhase1 Isoetes_tuckPhase0 Isoetes_tuckPhase1 Isoetes_virgPhase0 Isoetes_virgPhase1
phyluce_assembly_assemblo_spades --config phasedSamples.txt --output ./phased-assemblies/
phyluce_assembly_match_contigs_to_probes --contigs ./phased-assemblies/contigs/ --probes Isoetes_GoFlag_probeset.fasta --output ./phased-assemblies/target_loci/ --log-path ./phased-assemblies/
phyluce_assembly_get_match_counts --locus-db ./phased-assemblies/target_loci/probe.matches.sqlite --taxon-list-config phased-dataset.txt --taxon-group 'phased-dataset' --output ./phased-assemblies/target_loci/phased-dataset.complete.conf
phyluce_assembly_get_match_counts --locus-db ./phased-assemblies/target_loci/probe.matches.sqlite --taxon-list-config phased-dataset.txt --taxon-group 'phased-dataset' --incomplete-matrix --output ./phased-assemblies/target_loci/phased-dataset.incomplete.conf
phyluce_assembly_get_fastas_from_match_counts --contigs ./phased-assemblies/contigs/ --locus-db ./phased-assemblies/target_loci/probe.matches.sqlite --match-count-output ./phased-assemblies/target_loci/phased-dataset.complete.conf --output ./phased-assemblies/target_loci/phased-dataset.complete.fasta
phyluce_assembly_get_fastas_from_match_counts --contigs ./phased-assemblies/contigs/ --locus-db ./phased-assemblies/target_loci/probe.matches.sqlite --match-count-output ./phased-assemblies/target_loci/phased-dataset.incomplete.conf --incomplete-matrix ./phased-assemblies/target_loci/phased-dataset.incomplete.taxa --output ./phased-assemblies/target_loci/phased-dataset.incomplete.fasta
for i in {1..435} ; do grep "$i" phased-dataset.complete.fasta >> completeLoci.txt; done
for i in {1..435} ; do grep "$i" phased-dataset.incomplete.fasta >> incompleteLoci.txt; done
phyluceLocusAligner.py completeLoci.txt
#! /usr/bin/python
#phyluceLocusAligner.py
#Peter W. Schafran 2019
import sys
lociFile = sys.argv[1]
openLociFile = open(lociFile, "r")
uceList = []
uceDict = {}
for line in openLociFile:
splitline = line.strip("\n").split("|")
uceList.append(splitline[1])
try:
uceDict[splitline[1]].append(splitline[0])
except:
uceDict[splitline[1]] = []
uceDict[splitline[1]].append(splitline[0])
for key in uceDict.keys():
outfile = open("%s.txt" %(key), "w")
for item in sorted(set(uceDict[key])):
outfile.write("%s|%s\n" %(item, key))
outfile.close()
openLociFile.close()
for i in uce-*txt ; do getScaffoldsFromFasta.py phased-dataset.complete.fasta $i ; done # See https://peterwschafran.com/scripts.html or below for Python script
#### getScaffoldsFromFasta.py reproduced below this line ####
#! /usr/bin/python
'''Command Line:getScaffoldsFromFasta.py scaffoldFile.fasta ScaffoldID(.txt)'''
import sys
scaffoldFile = sys.argv[1]
scaffoldID = sys.argv[2]
openInFile = open(scaffoldFile, "r")
openOutFile = open("%s.fasta" %(scaffoldID), "w")
if ".txt" in scaffoldID:
infile = open(scaffoldID, "r")
for scaffoldName in infile:
writeOut = 0
for line in openInFile:
if ">" in line and scaffoldName.strip(">\n") == line.strip(">\n"):
print "Found scaffold %s" %(line)
writeOut = 1
openOutFile.write(line)
if ">" in line and scaffoldName not in line:
writeOut = 0
if ">" not in line:
if writeOut == 1:
openOutFile.write(line)
openInFile.seek(0)
else:
writeOut = 0
for line in openInFile:
if ">" in line and scaffoldID in line:
print "Found scaffold %s" %(line)
writeOut = 1
openOutFile.write(line)
if ">" in line and scaffoldID not in line:
writeOut = 0
if ">" not in line:
if writeOut == 1:
openOutFile.write(line)
openInFile.close()
openOutFile.close()
infile.close()
for i in {1..435}; do cat ../../noSinensisRef/target_loci/aligned/uce-$i.fasta uce-$i.txt.fasta > uce-$i-combined.fasta; done
for i in uce-*-combined.fasta ; do mafft --auto $i > MAFFT_$i ; done
for i in MAFFT_uce-*-combined.fasta ; do num=$(grep -c ">" $i ) ; if [ $num > 27 ] ; then iqtree -s $i -m MFP -bb 5000 -alrt 5000 -nm 5000 ; fi ; done
grep -h "Isoetes_engelmannii" MAFFT_uce-*-combined.fasta.treefile | grep "Isoetes_valida" | grep "Isoetes_appPhase0" | grep "Isoetes_appPhase1" > MAFFT.IQTREE.AppalachianaComplex.trees
sed -E s/uce-[0-9]+_//g < MAFFT.IQTREE.AppalachianaComplex.trees > MAFFT.IQTREE.AppalachianaComplex.renamed.trees
library(ape)
library(phytools)
appTrees <- read.tree("MAFFT.IQTREE.AppalachianaComplex.renamed.trees")
appMatrix <- matrix(data = NA, ncol = 8, nrow = length(appTrees)*2)
colnames(appMatrix) <- c("eng-app0", "val-app0", "app0-app1", "val-app1", "eng-app1", "app0-app1", "App0 Min Distance", "App1 Min Distance")
j <- 1
for (i in seq(1,length(appTrees),1)) {
appMatrix[j,1] <- fastDist(appTrees[[i]], "Isoetes_engelmannii", "Isoetes_appPhase0")
appMatrix[j,2] <- fastDist(appTrees[[i]], "Isoetes_valida", "Isoetes_appPhase0")
appMatrix[j,3] <- fastDist(appTrees[[i]], "Isoetes_appPhase0", "Isoetes_appPhase1")
appMatrix[j+1,4] <- fastDist(appTrees[[i]], "Isoetes_valida", "Isoetes_appPhase1")
appMatrix[j+1,5] <- fastDist(appTrees[[i]], "Isoetes_engelmannii", "Isoetes_appPhase1")
appMatrix[j+1,6] <- fastDist(appTrees[[i]], "Isoetes_appPhase0", "Isoetes_appPhase1")
appMatrix[j,7] <- colnames(appMatrix)[which.min(appMatrix[i,])]
appMatrix[j+1,8] <- colnames(appMatrix)[which.min(appMatrix[i+1,])]
j <- j + 2
}
phaseSums <- matrix(data = NA, nrow = 1, ncol = 5)
colnames(phaseSums) <- c("eng-app0", "val-app0", "eng-app1", "val-app1", "app0-app1")
phaseSums[1,1] <- sum(appMatrix[,7] == "eng-app0", na.rm = TRUE)
phaseSums[1,2] <- sum(appMatrix[,7] == "val-app0", na.rm = TRUE)
phaseSums[1,3] <- sum(appMatrix[,8] == "eng-app1", na.rm = TRUE)
phaseSums[1,4] <- sum(appMatrix[,8] == "val-app1", na.rm = TRUE)
phaseSums[1,5] <- sum(appMatrix[,8] == "app0-app1", na.rm = TRUE)
x <- barplot(phaseSums, xaxt = "n", ylab = "Number of genes supporting relationship")
labs <- colnames(phaseSums)
text(cex=1, x=x-.25, y=-1, labs, xpd=TRUE, srt=45)
from IPython.display import Image
from IPython.core.display import HTML
Image(url = "https://peterwschafran.com/codebooks/ComparePhyluceTrees.svg")
Example command line: reads_first.py -r READ1.fastq READ2.fastq -b Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix Isoetes_spPhase0 --bwa
reads_first.py -r /Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_appPhase0/split-adapter-quality-trimmed/Isoetes_appPhase0-READ1.fastq /Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_appPhase0/split-adapter-quality-trimmed/Isoetes_appPhase0-READ2.fastq -b Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_appPhase0 --bwa
reads_first.py -r /Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_appPhase1/split-adapter-quality-trimmed/Isoetes_appPhase1-READ1.fastq /Volumes/Samsung_T5/IsoetesDNA/GoFlag/Phyluce/Isoetes_appPhase1/split-adapter-quality-trimmed/Isoetes_appPhase1-READ2.fastq -b Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix I_appPhase1 --bwa
for i in I_app*; do cleanup.py $i; done
python /Applications/Phylogenetics/HybPiper/HybPiper/get_seq_lengths.py Isoetes_Unique_GoFlag_target_scaffolds.fasta namelist.txt dna > sequence_lengths.txt # must be called with "python get_seq_lengths.py ..." due to preserve correct sys.argv order
python /Applications/Phylogenetics/HybPiper/HybPiper/hybpiper_stats.py sequence_lengths.txt namelist.txt > statistics.txt
for i in I_*; do python /Applications/Phylogenetics/HybPiper/HybPiper/intronerate.py --prefix $i; done
python /Applications/Phylogenetics/HybPiper/HybPiper/retrieve_sequences.py Isoetes_Unique_GoFlag_target_scaffolds.fasta . dna
python /Applications/Phylogenetics/HybPiper/HybPiper/retrieve_sequences.py Isoetes_Unique_GoFlag_target_scaffolds.fasta . intron
python /Applications/Phylogenetics/HybPiper/HybPiper/retrieve_sequences.py Isoetes_Unique_GoFlag_target_scaffolds.fasta . supercontig
while read i; do python /Applications/Phylogenetics/HybPiper/HybPiper/paralog_investigator.py $i; done < namelist.txt
while read i; do python /Applications/Phylogenetics/HybPiper/HybPiper/paralog_retriever.py namelist.txt $i > $i.paralogs.fasta; done < all_loci.txt
cd ./dna
for i in *.FNA; do mafft --auto $i > MAFFT.dna.$i; done
for i in MAFFT.dna.*.FNA ; do iqtree -s $i -m MFP -bb 5000 -alrt 5000 -nm 5000 ; done
grep -h "Isoetes-engelmannii" MAFFT.dna.L*treefile | grep "Isoetes-valida" | grep "I_appPhase0" | grep "I_appPhase1" > MAFFT.IQTREE.AppalachianaComplex.trees
from IPython.display import Image
from IPython.core.display import HTML
Image(url = "https://peterwschafran.com/codebooks/CompareHybPiperTrees.svg")
### BWA
### Loops through all sample directories, build BWA index from HybPiper contigs, build SAM alignment of samples fastq reads against contigs
cd /Volumes/Samsung_T5/IsoetesDNA/GoFlag/HybPiper/I_appalachiana_WC10
for i in L* ; do bwa index $i/I_appalachiana_WC10/sequences/FNA/$i.FNA ; bwa mem $i/I_appalachiana_WC10/sequences/FNA/$i.FNA /Volumes/Samsung_T5/IsoetesDNA/GoFlag/Schafran_GoFlag/A01_trimmedReads/RAPiD-Genomics_F065_UFG_393202_P020_WC10_i5-514_i7-47_S2461_L007_R1_001_val_1.fq /Volumes/Samsung_T5/IsoetesDNA/GoFlag/Schafran_GoFlag/A01_trimmedReads/RAPiD-Genomics_F065_UFG_393202_P020_WC10_i5-514_i7-47_S2461_L007_R2_001_val_2.fq > $i.sam; done
### Convert SAM to BAM files
for i in L* ; do samtools view -bS $i.sam > $i.bam ; done
### Sort BAM files prior to phaseing
for i in L* ; do samtools sort -o $i/$i.sorted.bam bam/$i.bam ; done
### Phase BAM files
for i in L* ; do samtools phase -Q 33 -q 5 -b $i/$i $i/$i.sorted.bam ; done
### Extract reads from phased BAM files
for i in L* ; do samtools sort -n -o $i/$i.0.sort.bam $i/$i.0.bam && samtools fixmate -m $i/$i.0.sort.bam $i/$i.0.fixMate.bam && samtools sort -o $i/$i.0.sort.bam $i/$i.0.fixMate.bam && samtools markdup -r $i/$i.0.sort.bam $i/$i.0.rmDup.bam && bam2fastq $i/$i.0.rmDup.bam -o $i/$i-Phase0-R#.fastq ; done
for i in L* ; do samtools sort -n -o $i/$i.1.sort.bam $i/$i.1.bam && samtools fixmate -m $i/$i.1.sort.bam $i/$i.1.fixMate.bam && samtools sort -o $i/$i.1.sort.bam $i/$i.1.fixMate.bam && samtools markdup -r $i/$i.1.sort.bam $i/$i.1.rmDup.bam && bam2fastq $i/$i.1.rmDup.bam -o $i/$i-Phase1-R#.fastq ; done
for i in L* ; do spades.py -1 $i/$i-Phase0-R_1.fastq -2 $i/$i-Phase0-R_2.fastq --careful -o $i/spades_Phase0 ; cp $i/spades_Phase0/scaffolds.fasta $i-Phase0-scaffolds.fasta ; done
for i in L* ; do spades.py -1 $i/$i-Phase1-R_1.fastq -2 $i/$i-Phase1-R_2.fastq --careful -o $i/spades_Phase1 ; cp $i/spades_Phase1/scaffolds.fasta $i-Phase1-scaffolds.fasta ; done
cd /Volumes/Samsung_T5/IsoetesDNA/GoFlag/HybPiper/I_appalachiana_WC10/phased_reads/
for i in {1..453} ; do for j in {0..1} ; do reads_first.py -r L$i-Phase$j-R_1.fastq L$i-Phase$j-R_2.fastq -b ../Isoetes_Unique_GoFlag_target_scaffolds.fasta --prefix L$i-Phase$j --bwa ; done ; done
for i in {1..453} ; do for j in {0..1} ; do cleanup.py L$i-Phase$j; done ; done
python /Applications/Phylogenetics/HybPiper/HybPiper/get_seq_lengths.py Isoetes_Unique_GoFlag_target_scaffolds.fasta namelist.txt dna > sequence_lengths.txt # must be called with "python get_seq_lengths.py ..." due to preserve correct sys.argv order
for i in L* ; do echo $i >> namelist.txt ; done
python /Applications/Phylogenetics/HybPiper/HybPiper/get_seq_lengths.py ../../Isoetes_Unique_GoFlag_target_scaffolds.fasta namelist.txt dna > sequence_lengths.txt # must be called with "python get_seq_lengths.py ..." due to preserve correct sys.argv order
python /Applications/Phylogenetics/HybPiper/HybPiper/hybpiper_stats.py sequence_lengths.txt namelist.txt > statistics.txt
#### This fails for some reason, possibly b/c there are loci with 0 reads
Traceback (most recent call last):
File "/Applications/Phylogenetics/HybPiper/HybPiper/hybpiper_stats.py", line 153, in <module>
if __name__ == "__main__":main()
File "/Applications/Phylogenetics/HybPiper/HybPiper/hybpiper_stats.py", line 130, in main
stats_dict[name] += enrich_efficiency_bwa(bamfile)
File "/Applications/Phylogenetics/HybPiper/HybPiper/hybpiper_stats.py", line 51, in enrich_efficiency_bwa
return str(int(numReads)),str(int(mappedReads)),"{0:.3f}".format(mappedReads/numReads)
ZeroDivisionError: float division by zero
python /Applications/Phylogenetics/HybPiper/HybPiper/retrieve_sequences.py Isoetes_Unique_GoFlag_target_scaffolds.fasta . dna
for i in *.FNA ; do num=$(grep -c ">" $i ) ; if [ $num = 2 ] ; then echo $i >> twoPhaseLoci.txt ; fi ; done
while read i ; do num1=$(grep -c "Phase0" $i) ; num2=$(grep -c "Phase1" $i) ; if [[ num1 -eq 1 || num2 -eq 1 ]] ; then echo $i > goodPhasedLoci.txt ; fi ; done < twoPhaseLoci.txt
while read i ; do cat /Volumes/Samsung_T5/IsoetesDNA/GoFlag/HybPiper/I_appalachiana_WC10/phased_reads/$i /Volumes/Samsung_T5/IsoetesDNA/GoFlag/HybPiper/wSinensisRef/diploidsOnly/targetRegions/seqs/$i > Diploids-appPhases-combined-$i ; done < ../../../../I_appalachiana_WC10/phased_reads/goodPhasedLoci.txt
for i in Diploids-appPhases-combined-L* ; do sed -E s/L[0-9]+-/I_app/g < $i > $i.renamed.fasta ; done
for i in Diploids-appPhases-combined-L*.renamed.fasta ; do mafft --auto $i > MAFFT_$i ; done
for i in MAFFT_Diploids-appPhases-combined-L*.renamed.fasta ; do iqtree -s $i -m MFP -bb 5000 -alrt 5000 -nm 5000 ; done
grep -h "I_engelmannii" MAFFT_uce-*-combined.fasta.treefile | grep "I_valida" | grep "I_appPhase0" | grep "I_appPhase1" > MAFFT.IQTREE.AppalachianaComplex.trees
from IPython.display import Image
from IPython.core.display import HTML
Image(url = "https://peterwschafran.com/codebooks/CompareHybPiperTrees2.svg")
library(ape)
library(phytools)
appTrees <- read.tree(file.choose())
View(appTrees)
appMatrix <- matrix(data = NA, ncol = (length(appTrees[[1]]$tip.label)*2)-2, nrow = length(appTrees)*2)
colnames(appMatrix) <- c("I_valida_WC11","I_engelmannii_WA10","I_butleri_WA12", "I_flaccida_WD11","I_storkii_WF12","I_flaccida_WE11", "I_mattaponica_WB11", "I_melanopoda_WG11" ,"I_piedmontana_WF11", "I_lithophila_WB12", "I_melanopoda_WH11", "I_valida_WC11","I_engelmannii_WA10","I_butleri_WA12", "I_flaccida_WD11","I_storkii_WF12","I_flaccida_WE11", "I_mattaponica_WB11", "I_melanopoda_WG11" ,"I_piedmontana_WF11", "I_lithophila_WB12", "I_melanopoda_WH11" ,"App0 Min Distance", "App1 Min Distance")
j <- 1
engelmanniiPhase0_validaPhase1_Trees <- list()
eng0val1_counter <- 1
engelmanniiPhase1_validaPhase0_Trees <- list()
eng1val0_counter <- 1
for (i in seq(1,length(appTrees),1)) {
try(appMatrix[j,1] <- fastDist(appTrees[[i]], "I_valida_WC11", "I_appPhase0"))
try(appMatrix[j,2] <- fastDist(appTrees[[i]], "I_engelmannii_WA10", "I_appPhase0"))
try(appMatrix[j,3] <- fastDist(appTrees[[i]], "I_butleri_WA12", "I_appPhase0"))
try(appMatrix[j,4] <- fastDist(appTrees[[i]], "I_flaccida_WD11", "I_appPhase0"))
try(appMatrix[j,5] <- fastDist(appTrees[[i]], "I_storkii_WF12", "I_appPhase0"))
try(appMatrix[j,6] <- fastDist(appTrees[[i]], "I_flaccida_WE11", "I_appPhase0"))
try(appMatrix[j,7] <- fastDist(appTrees[[i]], "I_mattaponica_WB11", "I_appPhase0"))
try(appMatrix[j,8] <- fastDist(appTrees[[i]], "I_melanopoda_WG11", "I_appPhase0"))
try(appMatrix[j,9] <- fastDist(appTrees[[i]], "I_piedmontana_WF11", "I_appPhase0"))
try(appMatrix[j,10] <- fastDist(appTrees[[i]], "I_lithophila_WB12", "I_appPhase0"))
try(appMatrix[j,11] <- fastDist(appTrees[[i]], "I_melanopoda_WH11", "I_appPhase0"))
try(appMatrix[j+1,12] <- fastDist(appTrees[[i]], "I_valida_WC11", "I_appPhase1"))
try(appMatrix[j+1,13] <- fastDist(appTrees[[i]], "I_engelmannii_WA10", "I_appPhase1"))
try(appMatrix[j+1,14] <- fastDist(appTrees[[i]], "I_butleri_WA12", "I_appPhase1"))
try(appMatrix[j+1,15] <- fastDist(appTrees[[i]], "I_flaccida_WD11", "I_appPhase1"))
try(appMatrix[j+1,16] <- fastDist(appTrees[[i]], "I_storkii_WF12", "I_appPhase1"))
try(appMatrix[j+1,17] <- fastDist(appTrees[[i]], "I_flaccida_WE11", "I_appPhase1"))
try(appMatrix[j+1,18] <- fastDist(appTrees[[i]], "I_mattaponica_WB11", "I_appPhase1"))
try(appMatrix[j+1,19] <- fastDist(appTrees[[i]], "I_melanopoda_WG11", "I_appPhase1"))
try(appMatrix[j+1,20] <- fastDist(appTrees[[i]], "I_piedmontana_WF11", "I_appPhase1"))
try(appMatrix[j+1,21] <- fastDist(appTrees[[i]], "I_lithophila_WB12", "I_appPhase1"))
try(appMatrix[j+1,22] <- fastDist(appTrees[[i]], "I_melanopoda_WH11", "I_appPhase1"))
appMatrix[j,23] <- colnames(appMatrix)[which.min(appMatrix[i,])]
appMatrix[j+1,24] <- colnames(appMatrix)[which.min(appMatrix[i+1,])]
j <- j + 2
}
par(mfrow = c(2,1))
phase0table <- table(appMatrix[,23])
x <- barplot(phase0table, xaxt = "n", ylab = "Number of loci with closest diploid", main = "Isoetes appalachiana Phase 0")
labs <- names(phase0table)
phase1table <- table(appMatrix[,24])
x <- barplot(phase1table, xaxt = "n", ylab = "Number of loci with closest diploid", main = "Isoetes appalachiana Phase 1")
text(cex=0.75, x=x, y=-1.25, labs, xpd=TRUE, srt=45, adj = c(1,0))
### This plot posted below
par(mfrow = c(1,1))
bothphaseTable <- table(appMatrix[,23:24])
labs <- names(bothphaseTable)
x <- barplot(bothphaseTable, xaxt = "n", ylab = "Number of loci with closest diploid", main = "Isoetes appalachiana")
text(cex=0.75, x=x, y=-1.25, labs, xpd=TRUE, srt=45, adj = c(1,0))
from IPython.display import Image
from IPython.core.display import HTML
Image(url = "https://peterwschafran.com/codebooks/CompareHybPiperAllDiploids.svg")