CWD: /Volumes/Samsung_T5/IsoetesDNA/Illumina/March2019/
sftp zimmer_5477@dnaseq2.igsp.duke.edu
zimmer_5477@dnaseq2.igsp.duke.edu's password:
Connected to zimmer_5477@dnaseq2.igsp.duke.edu.
sftp> cd zimmer_5477/ZIMMER_5477_190301B1
sftp> ls -lah
drwxr-xr-x ? 0 501 2.5K Mar 7 04:21 .
dr-x------ ? 2762 501 38B Mar 7 04:06 ..
-rw-r--r-- ? 0 501 634M Mar 7 04:15 Isoetes_anatolica_S33_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 687M Mar 7 04:09 Isoetes_anatolica_S33_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 2.6G Mar 7 04:12 Isoetes_appalachiana_S_S38_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 2.8G Mar 7 04:20 Isoetes_appalachiana_S_S38_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 2.5G Mar 7 04:11 Isoetes_boomii_S42_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 2.8G Mar 7 04:12 Isoetes_boomii_S42_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.2G Mar 7 04:16 Isoetes_echinospora_1_S30_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.3G Mar 7 04:22 Isoetes_echinospora_1_S30_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.4G Mar 7 04:21 Isoetes_echinospora_2_S31_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.5G Mar 7 04:15 Isoetes_echinospora_2_S31_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 814M Mar 7 04:13 Isoetes_echinospora_3_S32_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 889M Mar 7 04:15 Isoetes_echinospora_3_S32_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.4G Mar 7 04:07 Isoetes_engelmannii_N_S44_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.5G Mar 7 04:19 Isoetes_engelmannii_N_S44_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.2G Mar 7 04:13 Isoetes_georgiana_S41_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.4G Mar 7 04:17 Isoetes_georgiana_S41_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.0G Mar 7 04:11 Isoetes_laurentiana_S45_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.1G Mar 7 04:14 Isoetes_laurentiana_S45_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.7G Mar 7 04:18 Isoetes_microvela_S40_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.8G Mar 7 04:07 Isoetes_microvela_S40_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 754M Mar 7 04:17 Isoetes_pallida_S36_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 826M Mar 7 04:17 Isoetes_pallida_S36_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 550M Mar 7 04:18 Isoetes_prototypus_S27_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 593M Mar 7 04:06 Isoetes_prototypus_S27_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.8G Mar 7 04:11 Isoetes_septentrionalis_S43_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.8G Mar 7 04:16 Isoetes_septentrionalis_S43_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 648M Mar 7 04:19 Isoetes_setacea_S35_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 701M Mar 7 04:21 Isoetes_setacea_S35_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1008M Mar 7 04:14 Isoetes_snowii_S28_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.1G Mar 7 04:19 Isoetes_snowii_S28_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.5G Mar 7 04:13 Isoetes_tennesseensis_S37_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.6G Mar 7 04:18 Isoetes_tennesseensis_S37_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 6.2G Mar 7 04:10 Isoetes_texana_S26_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 6.3G Mar 7 04:08 Isoetes_texana_S26_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 499M Mar 7 04:15 Isoetes_toximontana_S34_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 541M Mar 7 04:21 Isoetes_toximontana_S34_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.3G Mar 7 04:16 Isoetes_tuckermanii_S39_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 1.4G Mar 7 04:18 Isoetes_tuckermanii_S39_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 4.5G Mar 7 04:14 Isoetes_viridimontana_S29_L003_R1_001.fastq.gz
-rw-r--r-- ? 0 501 4.9G Mar 7 04:21 Isoetes_viridimontana_S29_L003_R2_001.fastq.gz
-rw-r--r-- ? 0 501 1.1K Mar 7 04:18 README.rtf
-rw-r--r-- ? 0 501 3.9K Mar 7 04:12 ZIMMER_5477_190301B1.checksum
get *
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_anatolica_S33_L003_R1_001.fastq.gz to Isoetes_anatolica_S33_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_anatolica_S33_L003_R1_001.fastq.gz 100% 634MB 8.0MB/s 01:19
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_anatolica_S33_L003_R2_001.fastq.gz to Isoetes_anatolica_S33_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_anatolica_S33_L003_R2_001.fastq.gz 100% 687MB 8.1MB/s 01:25
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_appalachiana_S_S38_L003_R1_001.fastq.gz to Isoetes_appalachiana_S_S38_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_appalachiana_S_S38_L003_R1_001.fastq.gz 100% 2706MB 8.0MB/s 05:39
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_appalachiana_S_S38_L003_R2_001.fastq.gz to Isoetes_appalachiana_S_S38_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_appalachiana_S_S38_L003_R2_001.fastq.gz 100% 2906MB 8.0MB/s 06:04
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_boomii_S42_L003_R1_001.fastq.gz to Isoetes_boomii_S42_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_boomii_S42_L003_R1_001.fastq.gz 100% 2605MB 7.9MB/s 05:30
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_boomii_S42_L003_R2_001.fastq.gz to Isoetes_boomii_S42_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_boomii_S42_L003_R2_001.fastq.gz 100% 2858MB 8.1MB/s 05:54
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_1_S30_L003_R1_001.fastq.gz to Isoetes_echinospora_1_S30_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_1_S30_L003_R1_001.fastq.gz 100% 1194MB 8.4MB/s 02:22
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_1_S30_L003_R2_001.fastq.gz to Isoetes_echinospora_1_S30_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_1_S30_L003_R2_001.fastq.gz 100% 1292MB 8.4MB/s 02:33
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_2_S31_L003_R1_001.fastq.gz to Isoetes_echinospora_2_S31_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_2_S31_L003_R1_001.fastq.gz 100% 1427MB 8.3MB/s 02:51
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_2_S31_L003_R2_001.fastq.gz to Isoetes_echinospora_2_S31_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_2_S31_L003_R2_001.fastq.gz 100% 1551MB 8.4MB/s 03:04
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_3_S32_L003_R1_001.fastq.gz to Isoetes_echinospora_3_S32_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_3_S32_L003_R1_001.fastq.gz 100% 814MB 8.3MB/s 01:38
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_3_S32_L003_R2_001.fastq.gz to Isoetes_echinospora_3_S32_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_echinospora_3_S32_L003_R2_001.fastq.gz 100% 889MB 8.3MB/s 01:46
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_engelmannii_N_S44_L003_R1_001.fastq.gz to Isoetes_engelmannii_N_S44_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_engelmannii_N_S44_L003_R1_001.fastq.gz 100% 1434MB 8.3MB/s 02:52
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_engelmannii_N_S44_L003_R2_001.fastq.gz to Isoetes_engelmannii_N_S44_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_engelmannii_N_S44_L003_R2_001.fastq.gz 100% 1545MB 8.3MB/s 03:05
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_georgiana_S41_L003_R1_001.fastq.gz to Isoetes_georgiana_S41_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_georgiana_S41_L003_R1_001.fastq.gz 100% 1271MB 8.3MB/s 02:32
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_georgiana_S41_L003_R2_001.fastq.gz to Isoetes_georgiana_S41_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_georgiana_S41_L003_R2_001.fastq.gz 100% 1392MB 8.3MB/s 02:47
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_laurentiana_S45_L003_R1_001.fastq.gz to Isoetes_laurentiana_S45_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_laurentiana_S45_L003_R1_001.fastq.gz 100% 1031MB 8.3MB/s 02:04
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_laurentiana_S45_L003_R2_001.fastq.gz to Isoetes_laurentiana_S45_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_laurentiana_S45_L003_R2_001.fastq.gz 100% 1122MB 8.3MB/s 02:14
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_microvela_S40_L003_R1_001.fastq.gz to Isoetes_microvela_S40_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_microvela_S40_L003_R1_001.fastq.gz 100% 1690MB 8.3MB/s 03:23
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_microvela_S40_L003_R2_001.fastq.gz to Isoetes_microvela_S40_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_microvela_S40_L003_R2_001.fastq.gz 100% 1849MB 8.3MB/s 03:42
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_pallida_S36_L003_R1_001.fastq.gz to Isoetes_pallida_S36_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_pallida_S36_L003_R1_001.fastq.gz 100% 754MB 8.3MB/s 01:31
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_pallida_S36_L003_R2_001.fastq.gz to Isoetes_pallida_S36_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_pallida_S36_L003_R2_001.fastq.gz 100% 826MB 8.3MB/s 01:39
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_prototypus_S27_L003_R1_001.fastq.gz to Isoetes_prototypus_S27_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_prototypus_S27_L003_R1_001.fastq.gz 100% 550MB 8.3MB/s 01:06
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_prototypus_S27_L003_R2_001.fastq.gz to Isoetes_prototypus_S27_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_prototypus_S27_L003_R2_001.fastq.gz 100% 592MB 8.2MB/s 01:12
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_septentrionalis_S43_L003_R1_001.fastq.gz to Isoetes_septentrionalis_S43_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_septentrionalis_S43_L003_R1_001.fastq.gz 100% 1837MB 7.8MB/s 03:55
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_septentrionalis_S43_L003_R2_001.fastq.gz to Isoetes_septentrionalis_S43_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_septentrionalis_S43_L003_R2_001.fastq.gz 100% 1877MB 7.9MB/s 03:57
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_setacea_S35_L003_R1_001.fastq.gz to Isoetes_setacea_S35_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_setacea_S35_L003_R1_001.fastq.gz 100% 648MB 7.9MB/s 01:21
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_setacea_S35_L003_R2_001.fastq.gz to Isoetes_setacea_S35_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_setacea_S35_L003_R2_001.fastq.gz 100% 700MB 7.9MB/s 01:28
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_snowii_S28_L003_R1_001.fastq.gz to Isoetes_snowii_S28_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_snowii_S28_L003_R1_001.fastq.gz 100% 1008MB 8.0MB/s 02:06
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_snowii_S28_L003_R2_001.fastq.gz to Isoetes_snowii_S28_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_snowii_S28_L003_R2_001.fastq.gz 100% 1110MB 8.0MB/s 02:19
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_tennesseensis_S37_L003_R1_001.fastq.gz to Isoetes_tennesseensis_S37_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_tennesseensis_S37_L003_R1_001.fastq.gz 100% 1550MB 8.0MB/s 03:14
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_tennesseensis_S37_L003_R2_001.fastq.gz to Isoetes_tennesseensis_S37_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_tennesseensis_S37_L003_R2_001.fastq.gz 100% 1681MB 8.0MB/s 03:29
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_texana_S26_L003_R1_001.fastq.gz to Isoetes_texana_S26_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_texana_S26_L003_R1_001.fastq.gz 100% 6310MB 7.9MB/s 13:14
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_texana_S26_L003_R2_001.fastq.gz to Isoetes_texana_S26_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_texana_S26_L003_R2_001.fastq.gz 100% 6432MB 8.1MB/s 13:18
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_toximontana_S34_L003_R1_001.fastq.gz to Isoetes_toximontana_S34_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_toximontana_S34_L003_R1_001.fastq.gz 100% 499MB 7.9MB/s 01:03
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_toximontana_S34_L003_R2_001.fastq.gz to Isoetes_toximontana_S34_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_toximontana_S34_L003_R2_001.fastq.gz 100% 541MB 8.0MB/s 01:07
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_tuckermanii_S39_L003_R1_001.fastq.gz to Isoetes_tuckermanii_S39_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_tuckermanii_S39_L003_R1_001.fastq.gz 100% 1301MB 8.0MB/s 02:43
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_tuckermanii_S39_L003_R2_001.fastq.gz to Isoetes_tuckermanii_S39_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_tuckermanii_S39_L003_R2_001.fastq.gz 100% 1418MB 8.0MB/s 02:57
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_viridimontana_S29_L003_R1_001.fastq.gz to Isoetes_viridimontana_S29_L003_R1_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_viridimontana_S29_L003_R1_001.fastq.gz 100% 4596MB 8.0MB/s 09:34
Fetching /zimmer_5477/ZIMMER_5477_190301B1/Isoetes_viridimontana_S29_L003_R2_001.fastq.gz to Isoetes_viridimontana_S29_L003_R2_001.fastq.gz
/zimmer_5477/ZIMMER_5477_190301B1/Isoetes_viridimontana_S29_L003_R2_001.fastq.gz 100% 5001MB 7.0MB/s 11:55
Fetching /zimmer_5477/ZIMMER_5477_190301B1/README.rtf to README.rtf
/zimmer_5477/ZIMMER_5477_190301B1/README.rtf 100% 1126 15.0KB/s 00:00
Fetching /zimmer_5477/ZIMMER_5477_190301B1/ZIMMER_5477_190301B1.checksum to ZIMMER_5477_190301B1.checksum
/zimmer_5477/ZIMMER_5477_190301B1/ZIMMER_5477_190301B1.checksum 100% 3978 51.5KB/s 00:00
CWD: /scratch-lustre/pscha005/Illumina/
Copied to cluster from Duke server as above.
#! /usr/bin/python
import re
import sys
import os
import subprocess
sampleDict = {} # Structure is {'Sample1':['Input_File_R1.fastq.gz','Input_File_R2.fastq.gz'], 'Sample2'...}
hpc = 0
fileList= []
if "-hpc" in sys.argv:
hpc = 1
for item in sys.argv:
if ".fastq" in item:
fileList.append(item)
if ".fasta" in item:
referenceFile = item
referenceName = ".".join(referenceFile.split(".")[:-1])
try:
if len(referenceFile) == 1:
print "Using reference: %s" %(referenceFile)
except:
print "*" *10
print "WARNING: No reference genome provided. SPAdes will be run on all data."
print "*" *10
if len(fileList) == 0:
print "!" *10
print "ERROR: No data (fastq files) supplied"
print "!" *10
sys.exit()
for fileName in fileList:
splitFileName = fileName.strip("\n").split("_")
sampleName = "_".join(splitFileName[:-2])
sampleDict[sampleName] = []
for fileName in fileList:
splitFileName = fileName.strip("\n").split("_")
sampleName = "_".join(splitFileName[:-2])
sampleDict[sampleName].append(fileName)
if hpc == 1:
print "*"*10
print "Running in HPC mode..."
'''Trimmomatic'''
def trimmomatic():
print "*"*10
print "Running trimmomatic..."
print "*"*10
global trimOutputDict
trimOutputDict = {}
for sample in sorted(sampleDict.keys()):
subprocess.call(['mkdir', sample])
if len(sampleDict[sample]) == 1 and hpc == 0:
print "*" * 10
print "WARNING: %s does not have 2 input files. Trimmomatic will be run in SE mode" %(sample)
print "*" * 10
try:
subprocess.call(['java', '-jar', 'trimmomatic-0.33.jar', 'SE', '-phred33', '%s' %(sampleDict[sample][0]), './%s/%s_TRIM.fq.gz'%(sample,sample), 'ILLUMINACLIP:./adapters/TruSeq3-SE.fa:2:30:10', 'LEADING:3', 'TRAILING:3', 'SLIDINGWINDOW:4:15', 'MINLEN:36'])
except:
sys.exit("ERROR: Trimmomatic.jar not found in current directory")
trimOutputDict[sample] = "%s_TRIM.fq.gz" %(sample)
elif len(sampleDict[sample]) == 1 and hpc == 1:
trimOutputDict[sample] = "%s_TRIM.fq.gz" %(sample)
trimScript = open("trimmomatic_%s.sh" %(sample), "w")
trimScript.write(
'''#!/bin/bash -l\n
#SBATCH --job-name=trim # Job name
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --cpus-per-task=16
#SBATCH --output=trimmomatic_%s.out # Standard output and error log
pwd; hostname; date
enable_lmod
module load java/11.0
java -jar /scratch-lustre/pscha005/trimmomatic/trimmomatic-0.33.jar SE -threads 16 %s ./%s/%s_TRIM.fq.gz ILLUMINACLIP:/scratch-lustre/pscha005/trimmomatic/adapters/TruSeq3-SE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
date
'''%(sample, sampleDict[sample][0], sample, sample))
trimScript.close()
elif len(sampleDict[sample]) == 2 and hpc == 0:
try:
subprocess.call(['java', '-jar', 'trimmomatic-0.33.jar', 'PE', '-phred33', '%s' %(sampleDict[sample][0]), '%s' %(sampleDict[sample][1]), './%s/%s_TRIM_forward_paired.fq.gz'%(sample,sample), './%s/%s_TRIM_forward_unpaired.fq.gz'%(sample,sample), './%s/%s_TRIM_reverse_paired.fq.gz'%(sample,sample), './%s/%s_TRIM_reverse_unpaired.fq.gz'%(sample,sample), 'ILLUMINACLIP:./adapters/TruSeq3-PE.fa:2:30:10', 'LEADING:3', 'TRAILING:3', 'SLIDINGWINDOW:4:15', 'MINLEN:36'])
except:
sys.exit("ERROR: Trimmomatic.jar not found in current directory")
trimOutputDict[sample] = ['%s_TRIM_forward_paired.fq.gz'%(sample), '%s_TRIM_reverse_paired.fq.gz'%(sample)]
elif len(sampleDict[sample]) == 2 and hpc == 1:
trimOutputDict[sample] = ['%s_TRIM_forward_paired.fq.gz'%(sample), '%s_TRIM_reverse_paired.fq.gz'%(sample)]
trimScript = open("trimmomatic_%s.sh" %(sample), "w")
trimScript.write(
'''#!/bin/bash -l\n
#SBATCH --job-name=trim # Job name
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --cpus-per-task=16
#SBATCH --output=trimmomatic_%s.out # Standard output and error log
pwd; hostname; date
enable_lmod
module load java/11.0
java -jar /scratch-lustre/pscha005/trimmomatic/trimmomatic-0.33.jar PE -threads 16 %s %s ./%s/%s_TRIM_forward_paired.fq.gz ./%s/%s_TRIM_forward_unpaired.fq.gz ./%s/%s_TRIM_reverse_paired.fq.gz ./%s/%s_TRIM_reverse_unpaired.fq.gz ILLUMINACLIP:/scratch-lustre/pscha005/trimmomatic/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
date
'''%(sample, sampleDict[sample][0], sampleDict[sample][1], sample, sample, sample, sample, sample, sample, sample, sample))
trimScript.close()
elif len(sampleDict[sample]) > 2:
print "!" * 10
print "ERROR: Too many files specified for %s" %(sample)
print "!" * 10
sys.exit()
'''Bowtie2'''
def bowtie():
if 'referenceName' in globals():
print "*"*10
print "Running bowtie..."
print "*"*10
reference = "reference"
subprocess.call(['mkdir', reference])
referenceOutput = "./reference/%s" %(referenceName)
try:
subprocess.call(['bowtie2-build', '-q', referenceFile, referenceOutput])
except:
sys.exit("ERROR: bowtie2 not found in PATH")
global bowtieOutputDict
bowtieOutputDict = {}
for sample in sorted(trimOutputDict.keys()):
if len(trimOutputDict[sample]) == 1 and hpc == 0:
subprocess.call(['bowtie2','--quiet', '--very-sensitive-local', '-x', referenceOutput, '-U', './%s/%s' %(sample,trimOutputDict[sample][0]), '--al-gz', './%s/%s_align.fq.gz' %(sample,sample), '--un-gz', './%s/%s_unalign.fq.gz' %(sample,sample)])
bowtieOutputDict[sample] = ["%s_align.fq.gz" %(sample), "%s_unalign.fq.gz" %(sample)]
elif len(trimOutputDict[sample]) == 1 and hpc == 1:
bowtieOutputDict[sample] = ["%s_align.fq.gz" %(sample), "%s_unalign.fq.gz" %(sample)]
bowtieScript = open("bowtie_%s.sh" %(sample), "w")
bowtieScript.write(
'''#!/bin/bash\n
#SBATCH --job-name=bwtie # Job name
#SBATCH --ntasks=1 # Run on a single CPU
pwd; hostname; date
module load bowtie2/2.2.4
bowtie2 --quiet --very-sensitive-local -x %s -U ./%s/%s --al-gz ./%s/%s_align.fq.gz --un-gz ./%s/%s_unalign.fq.gz
date
'''%(referenceOutput, sample, trimOutputDict[sample][0], sample, sample, sample, sample))
bowtieScript.close()
elif len(trimOutputDict[sample]) == 2 and hpc == 0:
subprocess.call(['bowtie2','--quiet', '--very-sensitive-local', '-x', referenceOutput, '-1', './%s/%s' %(sample,trimOutputDict[sample][0]), '-2', './%s/%s' %(sample,trimOutputDict[sample][1]), '--al-conc-gz', './%s/%s_R%%_align.fq.gz' %(sample,sample), '--un-conc-gz', './%s/%s_R%%_unalign.fq.gz' %(sample,sample)])
bowtieOutputDict[sample] = ["%s_R1_align.fq" %(sample), "%s_R2_align.fq" %(sample), "%s_R1_unalign.fq" %(sample), "%s_R2_unalign.fq" %(sample)]
elif len(trimOutputDict[sample]) == 2 and hpc == 1:
bowtieOutputDict[sample] = ["%s_R1_align.fq" %(sample), "%s_R2_align.fq" %(sample), "%s_R1_unalign.fq" %(sample), "%s_R2_unalign.fq" %(sample)]
bowtieScript = open("bowtie_%s.sh" %(sample), "w")
bowtieScript.write(
'''#!/bin/bash\n
#SBATCH --job-name=bwtie # Job name
#SBATCH --ntasks=1 # Run on a single CPU
pwd; hostname; date
module load bowtie2/2.2.4
bowtie2 --quiet --very-sensitive-local -x %s -1 ./%s/%s -2 ./%s/%s --al-conc-gz ./%s/%s_R%%_align.fq.gz --un-conc-gz ./%s/%s_R%%_unalign.fq.gz
date
'''%(referenceOutput, sample, trimOutputDict[sample][0], sample, trimOutputDict[sample][1], sample, sample, sample, sample))
bowtieScript.close()
else:
print "*"*10
print "WARNING: No reference file found, skipping bowtie filtering..."
print "*"*10
'''SPAdes'''
def spades():
for sample in sorted(bowtieOutputDict.keys()):
if hpc == 0 and len(bowtieOutputDict[sample]) == 2:
subprocess.call(['spades.py', '-k', '21,33,55,77', '--careful', '-s', './%s/%s' %(sample, bowtieOutputDict[sample][0]),'-o', './%s/spades_%s' %(sample, sample)])
elif hpc == 1 and len(bowtieOutputDict[sample]) == 2:
spadesScript = open("spades_%s.sh" %(sample), "w")
spadesScript.write(
'''#!/bin/bash -l
#SBATCH --job-name=spds # Job name
#SBATCH --mail-type=ALL # Mail events (NONE, BEGIN, END, FAIL, ALL)
#SBATCH --mail-user=<pscha005@odu.edu> # Where to send mail
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --cpus-per-task=16
#SBATCH --output=spades_%s.out # Standard output and error log
pwd; hostname; date
enable_lmod
module load spades/3.13
spades.py -t 16 -k 21,33,55,77 --careful -s ./%s/%s -o ./%s/spades_%s
date
'''%(sample, sample, bowtieOutputDict[sample][0], sample, bowtieOutputDict[sample][1], sample, sample))
spadesScript.close()
elif hpc == 0 and len(bowtieOutputDict[sample]) == 4:
subprocess.call(['spades.py', '-k', '21,33,55,77', '--careful', '-1', './%s/%s' %(sample, bowtieOutputDict[sample][0]), '-2', './%s/%s' %(sample, bowtieOutputDict[sample][1]),'-o', './%s/spades_%s' %(sample, sample)])
elif hpc == 1 and len(bowtieOutputDict[sample]) == 4:
spadesScript = open("spades_%s.sh" %(sample), "w")
spadesScript.write(
'''#!/bin/bash -l
#SBATCH --job-name=spds # Job name
#SBATCH --mail-type=ALL # Mail events (NONE, BEGIN, END, FAIL, ALL)
#SBATCH --mail-user=<pscha005@odu.edu> # Where to send mail
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --cpus-per-task=16
#SBATCH --output=spades_%s.out # Standard output and error log
pwd; hostname; date
enable_lmod
module load spades/3.13
spades.py -t 16 -k 21,33,55,77 --careful -1 ./%s/%s -2 ./%s/%s -o ./%s/spades_%s
date
'''%(sample, sample, bowtieOutputDict[sample][0], sample, bowtieOutputDict[sample][1], sample, sample))
spadesScript.close()
trimmomatic()
bowtie()
spades()
pwd
/scratch-lustre/pscha005/Illumina
bash
for i in trimmomatic*.sh; do sbatch $i; done
module load fastqc/0.11.5
module load java/1.8
for i in ./*/*paired.fq.gz; do fastqc $i; done
pwd
/scratch-lustre/pscha005/Illumina
bash
for i in spades*.sh; do sbatch $i; done