Local Directory: /Volumes/Seagate_Blue/IsoetesDNA/PacBio/
sftp zimmer_5478@dnaseq2.igsp.duke.edu
Password: iItYIlBrahB4
cd ./zimmer_5478/Zimmer_5478_20190219/
sftp> ls -lah
drwxr-xr-x ? 0 501 194B Feb 19 07:45 .
dr-x------ ? 2734 501 38B Feb 19 07:07 ..
drwxr-xr-x ? 0 501 638B Feb 19 07:30 2_B01
drwxr-xr-x ? 0 501 638B Feb 19 07:17 3_C01
drwxr-xr-x ? 0 501 638B Feb 19 07:26 4_D01
drwxr-xr-x ? 0 501 638B Feb 19 07:45 5_E01
drwxr-xr-x ? 0 501 408B Feb 19 07:45 pbcoretools.tasks.bam2fasta_ccs-0
drwxr-xr-x ? 0 501 408B Feb 19 07:17 pbcoretools.tasks.bam2fastq_ccs-0
sftp> ls -lah ./*/*
-rw-r--r-- ? 0 501 60B Feb 19 07:26 ./2_B01/m54089_190216_063434.adapters.fasta
-rw-r--r-- ? 0 501 898K Feb 19 07:26 ./2_B01/m54089_190216_063434.baz2bam_1.log
-rw-r--r-- ? 0 501 36.0G Feb 19 07:35 ./2_B01/m54089_190216_063434.scraps.bam
-rw-r--r-- ? 0 501 145M Feb 19 07:30 ./2_B01/m54089_190216_063434.scraps.bam.pbi
-rw-r--r-- ? 0 501 103K Feb 19 07:26 ./2_B01/m54089_190216_063434.sts.xml
-rw-r--r-- ? 0 501 27.0G Feb 19 07:30 ./2_B01/m54089_190216_063434.subreads.bam
-rw-r--r-- ? 0 501 111M Feb 19 07:26 ./2_B01/m54089_190216_063434.subreads.bam.pbi
-rw-r--r-- ? 0 501 16.4K Feb 19 07:26 ./2_B01/m54089_190216_063434.subreadset.xml
-rw-r--r-- ? 0 501 535B Feb 19 07:30 ./2_B01/m54089_190216_063434.transferdone
-rw-r--r-- ? 0 501 116K Feb 19 07:26 ./2_B01/tmp-file-3f735aa3-d04a-4ee4-bcb7-0b8f998535f5.txt
-rw-r--r-- ? 0 501 60B Feb 19 07:12 ./3_C01/m54089_190216_165938.adapters.fasta
-rw-r--r-- ? 0 501 931K Feb 19 07:17 ./3_C01/m54089_190216_165938.baz2bam_1.log
-rw-r--r-- ? 0 501 34.9G Feb 19 07:12 ./3_C01/m54089_190216_165938.scraps.bam
-rw-r--r-- ? 0 501 153M Feb 19 07:17 ./3_C01/m54089_190216_165938.scraps.bam.pbi
-rw-r--r-- ? 0 501 103K Feb 19 07:12 ./3_C01/m54089_190216_165938.sts.xml
-rw-r--r-- ? 0 501 28.9G Feb 19 07:17 ./3_C01/m54089_190216_165938.subreads.bam
-rw-r--r-- ? 0 501 120M Feb 19 07:17 ./3_C01/m54089_190216_165938.subreads.bam.pbi
-rw-r--r-- ? 0 501 16.4K Feb 19 07:07 ./3_C01/m54089_190216_165938.subreadset.xml
-rw-r--r-- ? 0 501 535B Feb 19 07:17 ./3_C01/m54089_190216_165938.transferdone
-rw-r--r-- ? 0 501 116K Feb 19 07:12 ./3_C01/tmp-file-d97896fe-6156-4627-879e-1a8761c02a91.txt
-rw-r--r-- ? 0 501 60B Feb 19 07:22 ./4_D01/m54089_190217_032517.adapters.fasta
-rw-r--r-- ? 0 501 897K Feb 19 07:22 ./4_D01/m54089_190217_032517.baz2bam_1.log
-rw-r--r-- ? 0 501 35.5G Feb 19 07:22 ./4_D01/m54089_190217_032517.scraps.bam
-rw-r--r-- ? 0 501 148M Feb 19 07:22 ./4_D01/m54089_190217_032517.scraps.bam.pbi
-rw-r--r-- ? 0 501 103K Feb 19 07:17 ./4_D01/m54089_190217_032517.sts.xml
-rw-r--r-- ? 0 501 27.2G Feb 19 07:26 ./4_D01/m54089_190217_032517.subreads.bam
-rw-r--r-- ? 0 501 113M Feb 19 07:22 ./4_D01/m54089_190217_032517.subreads.bam.pbi
-rw-r--r-- ? 0 501 16.4K Feb 19 07:26 ./4_D01/m54089_190217_032517.subreadset.xml
-rw-r--r-- ? 0 501 535B Feb 19 07:26 ./4_D01/m54089_190217_032517.transferdone
-rw-r--r-- ? 0 501 116K Feb 19 07:26 ./4_D01/tmp-file-1059bbc6-4f3b-46e7-bf03-dd9c4a20ce74.txt
-rw-r--r-- ? 0 501 60B Feb 19 07:35 ./5_E01/m54089_190217_135053.adapters.fasta
-rw-r--r-- ? 0 501 761K Feb 19 07:41 ./5_E01/m54089_190217_135053.baz2bam_1.log
-rw-r--r-- ? 0 501 37.0G Feb 19 07:41 ./5_E01/m54089_190217_135053.scraps.bam
-rw-r--r-- ? 0 501 139M Feb 19 07:41 ./5_E01/m54089_190217_135053.scraps.bam.pbi
-rw-r--r-- ? 0 501 102K Feb 19 07:41 ./5_E01/m54089_190217_135053.sts.xml
-rw-r--r-- ? 0 501 24.6G Feb 19 07:45 ./5_E01/m54089_190217_135053.subreads.bam
-rw-r--r-- ? 0 501 101M Feb 19 07:41 ./5_E01/m54089_190217_135053.subreads.bam.pbi
-rw-r--r-- ? 0 501 16.4K Feb 19 07:41 ./5_E01/m54089_190217_135053.subreadset.xml
-rw-r--r-- ? 0 501 535B Feb 19 07:41 ./5_E01/m54089_190217_135053.transferdone
-rw-r--r-- ? 0 501 116K Feb 19 07:35 ./5_E01/tmp-file-7a16764d-48de-4b4e-b3fb-14f4825edcc7.txt
-rw-r--r-- ? 0 501 239M Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/ccs.fasta.zip
-rw-r--r-- ? 0 501 742B Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/cluster.sh
-rw-r--r-- ? 0 501 0B Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/cluster.stderr
-rw-r--r-- ? 0 501 362B Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/cluster.stdout
-rw-r--r-- ? 0 501 1.2K Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/resolved-tool-contract.json
-rw-r--r-- ? 0 501 1.0K Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/run.sh
-rw-r--r-- ? 0 501 2.1K Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/runnable-task.json
-rw-r--r-- ? 0 501 0B Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/stderr
-rw-r--r-- ? 0 501 2.6K Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/stdout
-rw-r--r-- ? 0 501 1.2K Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/task-report.json
-rw-r--r-- ? 0 501 1.6K Feb 19 07:45 ./pbcoretools.tasks.bam2fasta_ccs-0/tool-contract.json
-rw-r--r-- ? 0 501 505M Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/ccs.fastq.zip
-rw-r--r-- ? 0 501 742B Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/cluster.sh
-rw-r--r-- ? 0 501 0B Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/cluster.stderr
-rw-r--r-- ? 0 501 362B Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/cluster.stdout
-rw-r--r-- ? 0 501 1.2K Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/resolved-tool-contract.json
-rw-r--r-- ? 0 501 1.0K Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/run.sh
-rw-r--r-- ? 0 501 2.1K Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/runnable-task.json
-rw-r--r-- ? 0 501 0B Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/stderr
-rw-r--r-- ? 0 501 2.6K Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/stdout
-rw-r--r-- ? 0 501 1.2K Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/task-report.json
-rw-r--r-- ? 0 501 1.6K Feb 19 07:17 ./pbcoretools.tasks.bam2fastq_ccs-0/tool-contract.json
sftp> pwd
Remote working directory: /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0
sftp> get -r ./
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/./ to .
Retrieving /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/cluster.sh 100% 742 13.4KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/resolved-tool-contract.json 100% 1271 22.9KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/runnable-task.json 100% 2193 39.2KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/ccs.fasta.zip 100% 239MB 1.2MB/s 03:22
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/.cluster-env.json 100% 12KB 254.8KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/cluster.stdout 100% 362 8.2KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/task-report.json 100% 1268 27.8KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/stdout 100% 2668 55.1KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/tool-contract.json 100% 1603 30.0KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/run.sh 100% 1055 19.9KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/.env.json
sftp> cd ../pbcoretools.tasks.bam2fastq_ccs-0/
sftp> pwd
Remote working directory: /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0
sftp> get -r ./
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/./ to .
Retrieving /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/ccs.fastq.zip 100% 505MB 1.4MB/s 06:14
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/cluster.sh 100% 742 15.8KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/resolved-tool-contract.json 100% 1271 35.3KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/runnable-task.json 100% 2193 59.1KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/.cluster-env.json 100% 12KB 238.2KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/cluster.stdout 100% 362 7.7KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/task-report.json 100% 1267 25.8KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/stdout 100% 2668 60.1KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/tool-contract.json 100% 1603 27.7KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/run.sh 100% 1055 24.7KB/s 00:00
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/.env.json 100% 8922 183.9KB/s 00:00
CWD: /scratch-lustre/pscha005/Pacbio/2019/
[pscha005@turing1 2019] sftp zimmer_5478@dnaseq2.igsp.duke.edu
Connecting to dnaseq2.igsp.duke.edu...
sftp> cd zimmer_5478/Zimmer_5478_20190219/2_B01/
sftp> get *
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.adapters.fasta to m54089_190216_063434.adapters.fasta
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.adapters.fasta 100% 60 0.1KB/s 00:01
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.baz2bam_1.log to m54089_190216_063434.baz2bam_1.log
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.baz2bam_1.log 100% 898KB 897.7KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.scraps.bam to m54089_190216_063434.scraps.bam
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.scraps.bam 100% 36GB 45.5MB/s 13:31
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.scraps.bam.pbi to m54089_190216_063434.scraps.bam.pbi
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.scraps.bam.pbi 100% 144MB 48.2MB/s 00:03
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.sts.xml to m54089_190216_063434.sts.xml
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.sts.xml 100% 103KB 103.0KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.subreads.bam to m54089_190216_063434.subreads.bam
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.subreads.bam 100% 27GB 47.3MB/s 09:44
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.subreads.bam.pbi to m54089_190216_063434.subreads.bam.pbi
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.subreads.bam.pbi 100% 111MB 55.3MB/s 00:02
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.subreadset.xml to m54089_190216_063434.subreadset.xml
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.subreadset.xml 100% 16KB 16.4KB/s 00:01
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.transferdone to m54089_190216_063434.transferdone
/zimmer_5478/Zimmer_5478_20190219/2_B01/m54089_190216_063434.transferdone 100% 535 0.5KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/2_B01/tmp-file-3f735aa3-d04a-4ee4-bcb7-0b8f998535f5.txt to tmp-file-3f735aa3-d04a-4ee4-bcb7-0b8f998535f5.txt
/zimmer_5478/Zimmer_5478_20190219/2_B01/tmp-file-3f735aa3-d04a-4ee4-bcb7-0b8f998535f5.txt 100% 116KB 116.2KB/s 00:00
sftp> cd ../3_C01
sftp> get *
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.adapters.fasta to m54089_190216_165938.adapters.fasta
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.adapters.fasta 100% 60 0.1KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.baz2bam_1.log to m54089_190216_165938.baz2bam_1.log
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.baz2bam_1.log 100% 931KB 930.6KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.scraps.bam to m54089_190216_165938.scraps.bam
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.scraps.bam 100% 35GB 55.4MB/s 10:45
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.scraps.bam.pbi to m54089_190216_165938.scraps.bam.pbi
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.scraps.bam.pbi 100% 152MB 50.8MB/s 00:03
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.sts.xml to m54089_190216_165938.sts.xml
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.sts.xml 100% 103KB 103.2KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.subreads.bam to m54089_190216_165938.subreads.bam
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.subreads.bam 100% 29GB 57.5MB/s 08:34
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.subreads.bam.pbi to m54089_190216_165938.subreads.bam.pbi
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.subreads.bam.pbi 100% 120MB 60.0MB/s 00:02
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.subreadset.xml to m54089_190216_165938.subreadset.xml
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.subreadset.xml 100% 16KB 16.4KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.transferdone to m54089_190216_165938.transferdone
/zimmer_5478/Zimmer_5478_20190219/3_C01/m54089_190216_165938.transferdone 100% 535 0.5KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/3_C01/tmp-file-d97896fe-6156-4627-879e-1a8761c02a91.txt to tmp-file-d97896fe-6156-4627-879e-1a8761c02a91.txt
/zimmer_5478/Zimmer_5478_20190219/3_C01/tmp-file-d97896fe-6156-4627-879e-1a8761c02a91.txt 100% 116KB 116.2KB/s 00:01
sftp> cd ../4_D01
sftp> get *
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.adapters.fasta to m54089_190217_032517.adapters.fasta
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.adapters.fasta 100% 60 0.1KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.baz2bam_1.log to m54089_190217_032517.baz2bam_1.log
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.baz2bam_1.log 100% 897KB 897.4KB/s 00:01
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.scraps.bam to m54089_190217_032517.scraps.bam
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.scraps.bam 100% 35GB 55.4MB/s 10:56
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.scraps.bam.pbi to m54089_190217_032517.scraps.bam.pbi
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.scraps.bam.pbi 100% 148MB 49.5MB/s 00:03
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.sts.xml to m54089_190217_032517.sts.xml
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.sts.xml 100% 103KB 103.3KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.subreads.bam to m54089_190217_032517.subreads.bam
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.subreads.bam 100% 27GB 57.0MB/s 08:08
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.subreads.bam.pbi to m54089_190217_032517.subreads.bam.pbi
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.subreads.bam.pbi 100% 113MB 56.7MB/s 00:02
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.subreadset.xml to m54089_190217_032517.subreadset.xml
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.subreadset.xml 100% 16KB 16.4KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.transferdone to m54089_190217_032517.transferdone
/zimmer_5478/Zimmer_5478_20190219/4_D01/m54089_190217_032517.transferdone 100% 535 0.5KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/4_D01/tmp-file-1059bbc6-4f3b-46e7-bf03-dd9c4a20ce74.txt to tmp-file-1059bbc6-4f3b-46e7-bf03-dd9c4a20ce74.txt
/zimmer_5478/Zimmer_5478_20190219/4_D01/tmp-file-1059bbc6-4f3b-46e7-bf03-dd9c4a20ce74.txt 100% 116KB 116.2KB/s 00:00
sftp> cd ../5_E01
sftp> get *
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.adapters.fasta to m54089_190217_135053.adapters.fasta
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.adapters.fasta 100% 60 0.1KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.baz2bam_1.log to m54089_190217_135053.baz2bam_1.log
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.baz2bam_1.log 100% 761KB 761.2KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.scraps.bam to m54089_190217_135053.scraps.bam
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.scraps.bam 100% 37GB 55.0MB/s 11:28
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.scraps.bam.pbi to m54089_190217_135053.scraps.bam.pbi
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.scraps.bam.pbi 100% 139MB 46.3MB/s 00:03
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.sts.xml to m54089_190217_135053.sts.xml
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.sts.xml 100% 102KB 101.8KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.subreads.bam to m54089_190217_135053.subreads.bam
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.subreads.bam 100% 25GB 53.7MB/s 07:50
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.subreads.bam.pbi to m54089_190217_135053.subreads.bam.pbi
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.subreads.bam.pbi 100% 101MB 50.4MB/s 00:02
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.subreadset.xml to m54089_190217_135053.subreadset.xml
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.subreadset.xml 100% 16KB 16.4KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.transferdone to m54089_190217_135053.transferdone
/zimmer_5478/Zimmer_5478_20190219/5_E01/m54089_190217_135053.transferdone 100% 535 0.5KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/5_E01/tmp-file-7a16764d-48de-4b4e-b3fb-14f4825edcc7.txt to tmp-file-7a16764d-48de-4b4e-b3fb-14f4825edcc7.txt
/zimmer_5478/Zimmer_5478_20190219/5_E01/tmp-file-7a16764d-48de-4b4e-b3fb-14f4825edcc7.txt 100% 116KB 116.2KB/s 00:00
sftp> cd pbcoretools.tasks.bam2fasta_ccs-0
sftp> get *
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/ccs.fasta.zip to ccs.fasta.zip
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/ccs.fasta.zip 100% 239MB 39.8MB/s 00:06
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/cluster.sh to cluster.sh
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/cluster.sh 100% 742 0.7KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/cluster.stderr to cluster.stderr
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/cluster.stdout to cluster.stdout
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/cluster.stdout 100% 362 0.4KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/resolved-tool-contract.json to resolved-tool-contract.json
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/resolved-tool-contract.json 100% 1271 1.2KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/run.sh to run.sh
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/run.sh 100% 1055 1.0KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/runnable-task.json to runnable-task.json
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/runnable-task.json 100% 2193 2.1KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/stderr to stderr
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/stdout to stdout
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/stdout 100% 2668 2.6KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/task-report.json to task-report.json
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/task-report.json 100% 1268 1.2KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/tool-contract.json to tool-contract.json
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fasta_ccs-0/tool-contract.json 100% 1603 1.6KB/s 00:00
sftp> cd ../pbcoretools.tasks.bam2fastq_ccs-0
sftp> get *
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/ccs.fastq.zip to ccs.fastq.zip
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/ccs.fastq.zip 100% 505MB 50.5MB/s 00:10
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/cluster.sh to cluster.sh
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/cluster.sh 100% 742 0.7KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/cluster.stderr to cluster.stderr
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/cluster.stdout to cluster.stdout
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/cluster.stdout 100% 362 0.4KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/resolved-tool-contract.json to resolved-tool-contract.json
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/resolved-tool-contract.json 100% 1271 1.2KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/run.sh to run.sh
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/run.sh 100% 1055 1.0KB/s 00:01
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/runnable-task.json to runnable-task.json
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/runnable-task.json 100% 2193 2.1KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/stderr to stderr
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/stdout to stdout
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/stdout 100% 2668 2.6KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/task-report.json to task-report.json
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/task-report.json 100% 1267 1.2KB/s 00:00
Fetching /zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/tool-contract.json to tool-contract.json
/zimmer_5478/Zimmer_5478_20190219/pbcoretools.tasks.bam2fastq_ccs-0/tool-contract.json 100% 1603 1.6KB/s 00:00
[pscha005@turing1 2019]$ pwd
/scratch-lustre/pscha005/Pacbio/2019
[pscha005@turing1 2019]$ ls -lah
total 32K
drwxr-xr-x 8 pscha005 users 4.0K Feb 19 17:32 .
drwxr-xr-x 3 pscha005 users 4.0K Feb 19 15:16 ..
drwxr-xr-x 2 pscha005 users 4.0K Feb 19 15:57 2_B01
drwxr-xr-x 2 pscha005 users 4.0K Feb 19 16:44 3_C01
drwxr-xr-x 2 pscha005 users 4.0K Feb 19 17:04 4_D01
drwxr-xr-x 2 pscha005 users 4.0K Feb 19 17:27 5_E01
drwxr-xr-x 2 pscha005 users 4.0K Feb 19 17:30 bam2fasta
drwxr-xr-x 2 pscha005 users 4.0K Feb 19 17:32 bam2fastq
Geneious End Trim tool used to clean sequences from CCS fastq. Max number of low quality bases (>0.05 chance of error) allowed was varied from 0 - 100. Results were exported as fasta's and processed with fasta_len_splitter.py to extract sequences from 800-1400 bp long and count sequences in 200 bp bins.
Results:
Seq Len 0 low qual bp 1 low qual bp 2 5 10 15 20 25 30 50 75 100 raw
0-200 158032 127945 111061 81904 56565 42338 32201 24465 18506 5764 3042 3019 427
200-400 92153 72641 65321 57615 52971 48866 45842 43921 42105 34164 21608 12964 6190
400-600 103185 72873 61358 49445 43752 42007 41084 39695 38569 35898 33704 30560 12917
600-800 83449 66718 58515 48248 43389 41523 40689 40379 40072 38440 37482 36456 21160
800-1000 239633 255677 263232 274091 282329 287126 290408 293238 295519 301424 305616 308842 310932
1000-1200 199535 244529 264698 291491 308842 317881 323945 328174 331611 341065 347598 352053 366025
1200-1400 126569 147831 156941 168802 177207 181726 184950 187386 189441 194955 199251 201954 215286
1400-1600 8559 8613 8145 7479 7032 6883 6694 6782 6787 6957 7344 7599 9527
1600-1800 6991 8034 8275 8104 7930 7887 7950 7899 7925 8108 8238 8407 7919
1800-2000 10211 13892 15428 17709 19399 20207 20786 21186 21504 22354 22961 23442 24906
2000+ 17796 27360 33139 41225 46697 49669 51564 52988 54074 56984 59269 60817 70824
#! /usr/bin/python
'''Sorts .fasta files into sequences <800bp, >800 and <1400bp, and >1400bp
Usage: ./thisscript.py files.fasta
'''
import sys
filelist=sys.argv[1:]
for file in filelist:
splitfile=file.split(".")
infile = open(file, 'r')
rightsize_outfile = open("%s_rightsize.fasta" %(splitfile[0]), 'w')
toosmall_outfile = open("%s_toosmall.fasta" %(splitfile[0]), 'w')
toobig_outfile = open("%s_toobig.fasta" %(splitfile[0]), 'w')
toosmallseqs=0
rightsizeseqs=0
toobigseqs=0
totalseqs=0
key = 0
bin0 = 0
bin200 = 0
bin400 = 0
bin600 = 0
bin800 = 0
bin1000 = 0
bin1200 = 0
bin1400 = 0
bin1600 = 0
bin1800 = 0
bin2000 = 0
fastaDict = {}
for line in infile:
if ">" in line:
while key != 0:
joinLine = "".join(fastaDict[key])
fastaDict[key] = joinLine
key = 0
key = line.strip("\n")
totalseqs += 1
fastaDict[key] = []
if totalseqs == 1000:
print "1k seqs processed..."
if totalseqs == 10000:
print "10k seqs processed..."
if totalseqs == 50000:
print "50k seqs processed..."
if totalseqs == 100000:
print "100k seqs processed..."
if totalseqs == 250000:
print "250k seqs processed..."
if totalseqs == 500000:
print "500k seqs processed..."
if totalseqs == 750000:
print "750k seqs processed..."
if totalseqs == 1000000:
print "1M seqs processed..."
if ">" not in line:
stripLine = line.strip("\n")
fastaDict[key].append(stripLine)
joinLine = "".join(fastaDict[key])
fastaDict[key] = joinLine
for key in fastaDict.keys():
seq = fastaDict[key]
if len(seq) < 700:
toosmall_outfile.write("%s\n" %(key))
toosmall_outfile.write("%s\n" %(seq))
toosmallseqs+=1
elif len(seq) >=700 and len(seq) <1500:
rightsize_outfile.write("%s\n" %(key))
rightsize_outfile.write("%s\n" %(seq))
rightsizeseqs+=1
elif len(seq) >=1500:
toobig_outfile.write("%s\n" %(key))
toobig_outfile.write("%s\n" %(seq))
toobigseqs+=1
if len(seq) < 200:
bin0 += 1
if len(seq) >= 200 and len(seq) < 400:
bin200 += 1
if len(seq) >= 400 and len(seq) < 600:
bin400 += 1
if len(seq) >= 600 and len(seq) < 800:
bin600 += 1
if len(seq) >= 800 and len(seq) < 1000:
bin800 += 1
if len(seq) >= 1000 and len(seq) < 1200:
bin1000 += 1
if len(seq) >= 1200 and len(seq) < 1400:
bin1200 += 1
if len(seq) >= 1400 and len(seq) < 1600:
bin1400 += 1
if len(seq) >= 1600 and len(seq) < 1800:
bin1600 += 1
if len(seq) >= 1800 and len(seq) < 2000:
bin1800 += 1
if len(seq) >= 2000:
bin2000 += 1
totalseqs=toosmallseqs+rightsizeseqs+toobigseqs
percenttoosmall = (float(toosmallseqs)/float(totalseqs))*100
percentrightsize = (float(rightsizeseqs)/float(totalseqs))*100
percenttoobig = (float(toobigseqs)/float(totalseqs))*100
print "============"
print "Output Stats"
print "============"
print "Total Seqs Processed: %d" %(totalseqs)
print "Seqs <800bp: %d (%.1f%%)" %(toosmallseqs,percenttoosmall)
print "Seqs >800bp and <1400bp: %d (%.1f%%)" %(rightsizeseqs,percentrightsize)
print "Seqs >1400bp: %d (%.1f%%)" %(toobigseqs,percenttoobig)
print "============"
print "bin0\t%s" %(bin0)
print "bin200\t%s" %(bin200)
print "bin400\t%s" %(bin400)
print "bin600\t%s" %(bin600)
print "bin800\t%s" %(bin800)
print "bin1000\t%s" %(bin1000)
print "bin1200\t%s" %(bin1200)
print "bin1400\t%s" %(bin1400)
print "bin1600\t%s" %(bin1600)
print "bin1800\t%s" %(bin1800)
print "bin2000\t%s" %(bin2000)
>BCF1
TCAGACGATGCGTCAT
>BCF2
CTATACATGACTCTGC
>BCF3
TACTAGAGTAGCACTC
>BCF4
TGTGTATCAGTACATG
>BCF5
ACACGCATGACACACT
...
>BCR1
TCAGACGATGCGTCAT
>BCR2
TACTAGAGTAGCACTC
>BCR3
TGTGTATCAGTACATG
>BCR4
GATCTCTACTATATGC
>BCR5
ATGATGTGCTACATCT
...
BCF67 BCR46 Isoetes_sp_Schafran210_3
BCF68 BCR46 Isoetes_sp_Schafran210_4
BCF69 BCR46 Isoetes_occidentalis_WoodbridgeSN1_1
BCF70 BCR46 Isoetes_occidentalis_WoodbridgeSN1_2
BCF71 BCR46 Isoetes_occidentalis_WoodbridgeSN1_3
BCF72 BCR46 Isoetes_occidentalis_WoodbridgeSN1_4
BCF73 BCR46 Isoetes_occidentalis_WoodbridgeSN1_5
BCF74 BCR46 Isoetes_maritima_WoodbridgeSN2_
BCF67 BCR47 Isoetes_melanopoda_Ciafre728_1
BCF68 BCR47 Isoetes_melanopoda_Ciafre728_2
...
>locus=IB1/ref_taxon=Isoetes
TTTCACCTGTAACGATTTAATTGAGGATGCAAAAGACTATACAGTGGAAAACAAGTAACGATGTTTCAAACTCTCCTTAGGTTTCTGTAGCGAAATATTTTCTCAAGATGTTTCTTTCAAATGTGACACTTTATCAGAACTTCAAAAATGGTCTAGTGACCTTATTCTTTGAAAGAACTTCAAAAATGGTCTAGTGACCTTATTCTTTGTTCTGATATATTTTACTAGGTATTAGCAGCTCTAGGAATAAAAGGAGAAGTGCCAGTGCCAAAAGTATATTGCCTGTGCAAAGATTCATCCATAATTGGTACACCCTTCTATGTCATGGAATACGTTCAAGGACGAATTTTTCTGGAACCGAGTCTAGAGGTTAGAATTATTCATGATTGTTTATAAATTAGATATATTGTTCTATTTCTTCTGTTCTTAGTTGTTTGTCTGTAGGCTTTGAGCATCAATGAACGGAAAAGCATCTATGATGCAATGGCAAAGACTCTGGCGGCAATTCACACAACCAATGTTGATGAGATTGGATTGGAAAGATACGGCCGAAAGGAAAACTATTGCAAGAGGCAGGTGAAGTATTCTTACGGAAGTAACTGTATACCTTGATCAACAGCCCTTGAATTCTGTTTATAGTAAAATGTTACTTCACAATGAGGAATTCTGTTCATAGTAAAATATTACTTCACAATGTCACGATATGGTTCCAGATAGAAAATACCTTTCTTAATGTTGTCGAGCGAAACTAGAACAGAACATTAAGAACTTTCGGAAACATTTTAGTGCTTGGTCTTGCTGTTTCAATTGTTTAATTATATGCTATTATTATAACAAGCTTTATGCTGCTTATCAGGTTGAGAGATGGTCATCACAATATGCAGCATCAACAGGAGAAGGGAAGGCAACTTCTGACCCAGCCATGTACAAGCTTATTGATTGGTTAAGGGCGAATGTTCCGACAGAAGACTCAGATCTAAACTATGCAGGACTTGTTCATGGAGACTTTCGTCTTGATAATTTGGTATTCCATCCCGAGAAGGTATTAGAAGTCTTCCCACTTCATGAATATTTATTGCTTAGAGGATCTATGGAGGGAACATTTTTTTATGACAACCAGTTTTGGGTGACATATCTTGGTCATAATGCATCCATAGTAATTATGGACGCAAATGATGCAGGATCCTAATTAAATCCGAATCCTCCAGGATCATGCAGTCCATACATAGAATTTAGACAAGATTTGCTTGATATACCGCTTGTCGAGATAAGATAATACCATCTTATCCCTCCATACTCTATGCCAAGAATAGTGCAGCAGACTTAGATCAGTTATTTCAAATTATTTTGCAAATTGTTCCTCCCCAGTTATTTCAAAGTAGTTTGCAAATTGTTCCTCAGAGATCAGTTATTTCAAAGTAGTTTGCAAATTGTTCCTTATTAGGTCGWGTCTCACTTCTCATAATGAGGTCATTATATACAATACTGTGTTTGCAATTAAGTTAACATATATCCTGGATCTGCATCACCTTGTTTTTAGCTCTGTTCATAAATGGTCCTTCATGTATAGATTTGTAATGTCATTACTCAATTAGTCTAATATTCTTGTTTTAGATTTAAACAACGTCCTTCGACACAATTTCTCTAGGTATGCCAAACTTACACATCAGTTTATCTTACACATAGCACATAGGTTTACACACAATGTCTGCTGTTTTTACTTCTTAGAACAGTATATATGCAACTGGATATGACATGTAATGTTACCTTCCGTAGGCTTAGTAAAGAGATGTTCTCATGCAAAACTGCTTTGTTTACTTTGGTACTATTTTGGATGAATTCAAAGAATTTAAAAAGTTTGACTAAAACC
>locus=IB2/ref_taxon=Isoetes
AGAWGGAGAGATACGATCAGGATTTGCAATGACTGAACCCCAAGYTGCATCTTCAGATGCAACAAACATAGAGTGCAAAATTGACCGGTATATTAGATGAGTTGACCATGTAAAAAAGTGATAATTAACAACTACTAAATAATTGCTCTGGCAAATTCGCTGGCAGTGAGGGAGATTATTACATCATCAATGGAAGGAAGTGGTGGACAAGTGGTGCAATGGATCCAAGATGCCAAATACTTATTTTGATGGTAAGTTCCAAACACATGAAGGTAACTTCTCAAGAAGGAGCTTCAGGAGCTGGATTTAACCAGAACCTTAACATGTTTGAACTGCGCATATTGCTAAGTCATGGCCCCATTACTTAATACTACTTGATCACTTATCTGTTGCAGGAAAATTCTTATGATGTTTGCGCATTTCACAAATCTAGTTTCCTTTTTATACGTACTAGGCATCAGTCTCAAAGATAACCCTATGTGATCATGACAGGGGAAAACAGATGTAAATGCACCTGCATACAGACAACAGTCAATGATTCTAGTGGACATAAAAACACAAGGGGTCAAAATAATCCGCCCTTTAACAGTGTATGGTTATGATGATGCACCACACGGACATGCAGAAATAGTGTTTGAAAATGTTCGTGTGCCTGCACTCAACATAATCTGGGAAGAAGGACGTGGTTTTGAAATATCACAGGCAAGTGAAATATTCAAGATAATGTAGATATTGGTGGACTATGCTAATTTTTAATTGGCACATAACAGGGGAGGCTAGGACCTGGGCGGTTGCACCACTGCATGAGACTCGTTGGGTCTGCGGAG
>locus=PGI/ref_taxon=Isoetes
ATGTTACTAAGTGAGCGACTATCTTTTCTTCCCTCCTATTAATTTTTATACGTGTGCATTGTAGTTGTGCTTTGTACATCAGGATGATTGAGAACTATAATATGTTTTGTCCTATTAAGCTTTACAAGTCTAGCAATGGTCGACTCATGTTTAATGGTTGTTTGTCATCTTGTGCACTATGAAGATTTTTGGAAGGTGCATGGAGCATTGATCATCACTTTCAGCATACTTCATTTGAAAGGAACTTGCCTGTAAGTTGACAACTATTAATATTCTACTTCAAAACTTAAAAAACTGACATCTTTCTGTTGCAGGTCCTTCTTGGTCTCCTAAGTGTCTGGAATGTTTCATTCTTAGATCGTCCTGCACGGGTATAACATAAATTGACGACGTATCTTCAAAATAAATGATACTTGTCCGAAAATGGCATTGCTTAATCTTGTCTTTTGTAGGCCATTTTACCATATTGTCAAGCTCTCCAGAAATTGGCTCCACATATTCAACAGGCAAATGATTTTTTTCTCGGATCTTGGTGCGCATTATGATTTTTTCCTGACGAATCATTTTACCAGGTTAGTATGGAAAGCAATGGGAAAGGTGTTAGTATTGAAGGGATATCCCTACCTTATCAAACTGGGGAAATTGATTTTGGAGAACCTGGTACAAATGGTCAGCACAGCTTTTATCAATTGATTCATCAGGTATATAACATCTCACCAATGCTTCTTTACTTTATCCTGTTTTTGTTTAAGTCCCTTGAAAGCAATTATAGACCCCCTTAAGTACAGTATTGCTTGGGGGTAATAGTATGGGATAATGGAAATTTCAATGCAGCTCTAATGGTTGAATGATAGGTGCACTAAGGATGTCTGTAAACACCATCATTGGCTTTTTTTTTAATTTTTTTATCGAGTACCTACTATCATCAATTATTAGGGCATAGCTTCCTAACCTTATGATTGGTGTGTAA
>locus=LFY/ref_taxon=Isoetes
TGCAACTTCACAATTTTTTATCCTTCCACTGAAGATGCTCTTAAAAGAGGCATGCTTGTAATAGGAAAGTCAAGTTTTCTTACTGCAATTATTTTTATCTCAGGGAACATATTGTAATTAGTGTCAGTCTGTGTTGAATAGTCCATGTGATAAGCCATGTGTCTCAAGAAATGTGCATCATCATTGAGATAGTTATCTGTGATAGCAACATTGCTTCAAACTTTAATGTTGAGAATATACTTTTTCCTCAAACGGTATGGAAGTTGCTAACCAGACTATAAAGTTTGAGAGGAAACAGTAGTTGGCGTTTCTTTTGGCAAAAATCTCACCTGTGTGAACTTTGATTAGATTGCRTGGCACAATTTTTTGACATAAGAATGTGCCGAGCACAATAGTAAATAATGGCCATGCGCAAGCCTAGTTGCCTATTATGCTCAACTGATAGCAGACTTGTGCTTTGATGTTTCTTATGGATTTATTCACTCCCCCATAGAAACGTGCATAGCCACCATATCATTCATATGAGTTTATGTTATTGTGCTTATTTTGGAAGAACTTTATAAGAATGGTCTGCCAGTGAATATTTTGGACGGGCTTCTAATCCGTAAAGCTTTCTAAGGAGGGAACTAACTAAACAGAGGGTAATATTTAGTACTCGGGATGTCCATGTAACAGCCCTTCCATGATTTGGGACAAGTTTCATACAATTTCAGAGAGTGCTGTTGACTGGATTCTTGAAGGGTTGGCTGTCTTTTCAAATTCTTGCTGATATATCAATACTAAGTTCATGAGGCCACATTACTGGAGGAATAAGTGCTAACTAGCAGAAAACAAAGCATGTACCTTTTTGTTTAAAATCTATGGGTTTCTG
### BLAST filtered fasta against refseq_blastdb constructed by PURC
iPhone:purc_out_10lqbp_rightSize_BlastDBs Peter$ blastn -query ../ccs_10lqbp_rightsize_renamed_renamed.fasta -task blastn -num_threads 4 -db refseq_blastdb -out REFBLASTOUT.txt -evalue 0.0000001 -max_target_seqs 1 -outfmt '6 qacc sacc nident mismatch length pident bitscore'
### Separate loci based on BLAST results
iPhone:purc_out_10lqbp_rightSize_BlastDBs Peter$ grep "locus=LFY" REFBLASTOUT.txt > LFY_BLASTOUT.txt
iPhone:purc_out_10lqbp_rightSize_BlastDBs Peter$ grep "locus=pgiC" REFBLASTOUT.txt > PGI_BLASTOUT.txt
iPhone:purc_out_10lqbp_rightSize_BlastDBs Peter$ grep "locus=IBR31" REFBLASTOUT.txt > IBR31_BLASTOUT.txt
iPhone:purc_out_10lqbp_rightSize_BlastDBs Peter$ grep "locus=IBR32" REFBLASTOUT.txt > IBR32_BLASTOUT.txt
### From separate BLAST results, extract just sequences with bit score > 1250 or hit length > 800. Histograms of
### bit scores and hit lengths for each locus show large peak around 1000-2000 bit score/800-1000 hit length and
### few small outliers, except IBR3-1 which shows many very small outliers
### Python used to extract "good" matches to each locus from original sequence file
infile = open("LFY_BLASTOUT.txt", "r")
outfile = open("LFY_goodHits.txt", "w")
for line in infile:
splitline = line.strip("\n").split("\t")
### 1250 bit score/800 hit length selected based on distribution of all scores/lengths viewed in R (see Isoetes_PURC.R)
if float(splitline[6]) > 1250 or float(splitline[4]) > 800:
outfile.write("%s\n" %(splitline[0]))
infile.close()
outfile.close()
iPhone:purc_out_10lqbp_rightSize_BlastDBs Peter: grep -c "m54089" *_BLASTOUT.txt IBR31_BLASTOUT.txt: 294206 IBR32_BLASTOUT.txt: 258023 LFY_BLASTOUT.txt: 104740 PGI_BLASTOUT.txt: 219169
Peters-MBP-2:purc_out_10lqbp_rightSize_BlastDBs Peter: grep -c "m54089" *_goodHits.txt IBR31_goodHits.txt: 176661 IBR32_goodHits.txt: 251438 LFY_goodHits.txt: 94006 PGI_goodHits.txt: 209957
IBR3-1: 60.0% IBR3-2: 97.4% LFY: 89.8% PGI: 95.8%
### BLAST "good" hits against barcode_blastdb constructed by PURC
Peters-MBP-2:purc_out_10lqbp_rightSize_BlastDBs Peter$ blastn -query LFY_goodHits_ccs_10lqbp_rightsize_filtered.fasta -task blastn -num_threads 4 -db barcode_blastdb -out LFY_barcodeBlastOut.txt -evalue 0.0000001 -max_target_seqs 2 -outfmt '6 qacc sacc length pident bitscore qstart qend
Submission script mafft_LFY.sh
#!/bin/bash -l
#SBATCH --job-name=maft # Job name
#SBATCH --mail-type=ALL # Mail events (NONE, BEGIN, END, FAIL, ALL)
#SBATCH --mail-user=<pscha005@odu.edu> # Where to send mail
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --cpus-per-task=32
#SBATCH -p himem
#SBATCH --output=2019March08_LFY_mafft_%j.out # Standard output and error log
pwd; hostname;date
module load mafft/7.309
mafft --thread 32 --adjustdirection LFY_goodHits_ccs_10lqbp_rightsize_filtered.fasta > LFY_redirected.fasta
date
First test produced alignments that were extremely gappy. Testing the --adjustdirectionaccurately setting and -ep 0.123 legacy gap extension penalty (recommended on MAFFT website https://mafft.cbrc.jp/alignment/software/ep0.123.html).
Also testing splitting sequence files into subsets of ~5000 sequences, then running through MAFFT separately (ghetto MPI). Script below:
#! /usr/bin/python
'''
'''
import sys
file=sys.argv[1]
filename = file.split(".")
splitLength=int(sys.argv[2])
fileCounter = 1
seqCounter = 0
print "Splitting fasta to files with %d sequences" %(splitLength)
openfile = open(file, "r")
outfile = open("%s_%d.fasta" %(filename[0],fileCounter), "w")
print "Writing file %d..." %(fileCounter)
for line in openfile:
if ">" in line:
seqCounter += 1
if seqCounter > splitLength:
fileCounter += 1
outfile.close()
outfile = open("%s_%d.fasta" %(filename[0],fileCounter), "w")
print "Writing file %d..." %(fileCounter)
seqCounter = 1
outfile.write(line)
outfile.close()
filename = filename[0]
trimName = filename.split(".")
for i in range(1,fileCounter):
trimScript = open("mafft_%s_%d.sh" %(trimName[0],i), "w")
trimScript.write(
'''#!/bin/bash -l\n
#SBATCH --job-name=mafft # Job name
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --cpus-per-task=16
#SBATCH --output=mafft_%s_%d.out # Standard output and error log
pwd; hostname; date
module load mafft/7.309
mafft --thread 16 --adjustdirection --globalpair --maxiterate 10 %s_%d.fasta > %s_%d_redirected.fasta
date
''' %(trimName[0], i, trimName[0], i, trimName[0], i))
trimScript.close()
[pscha005@turing1 ManualDemultiplex]$ ../../../scripts/fastaSplitter.py IBR32_goodHits_ccs_10lqbp_rightsize_filtered.fasta 5000 Splitting fastq to files with 5000 sequences Writing file 1... Writing file 2... Writing file 3... Writing file 4... Writing file 5... Writing file 6... Writing file 7... Writing file 8... Writing file 9... Writing file 10... Writing file 11... Writing file 12... Writing file 13... Writing file 14... Writing file 15... Writing file 16... Writing file 17... Writing file 18... Writing file 19... Writing file 20... Writing file 21... Writing file 22... Writing file 23... Writing file 24... Writing file 25... Writing file 26... Writing file 27... Writing file 28... Writing file 29... Writing file 30... Writing file 31... Writing file 32... Writing file 33... Writing file 34... Writing file 35... Writing file 36... Writing file 37... Writing file 38... Writing file 39... Writing file 40... Writing file 41... Writing file 42... Writing file 43... Writing file 44... Writing file 45... Writing file 46... Writing file 47... Writing file 48... Writing file 49... Writing file 50... Writing file 51...
### When MAFFT redirecting/aligning complete, sequences are "de-aligned" by removing gap characters(hyphens)
>>> infile = open("LFY_redirected.fasta" , "r")
>>> outfile = open("LFY_redirected_noGaps.fasta", "w")
>>> for line in infile:
... if ">" in line:
... outfile.write( "\n%s" %(line)) #This makes the first line of the file empty and needs to be manually removed
... else:
... stripline = line.strip("\n")
... for item in stripline:
... if item != "-":
... outfile.write(item)