1.计算reads PE长度脚本
01.Cal_PE_Depth.sh
Read_count=`gzip -dc Reads1.fq.gz |wc -l |awk '{print $1/4}'`echo "read pair count : $Read_count"average_depth=`expr $Read_count \* 200 / 6000000 `echo average depth: $average_depth
2.计算reads覆盖度
02.coverage.R
depth<-(1599999*100*2/6e6)print(depth)coverage<-ppois(40,lambda=depth,lower=F)print(coverage)
3. 计算N50
03.Cal_N50.pl
#!/usr/bin/perluse strict;my $file = shift;open (IN,"$file") or die ("can't open the file!\n");open (OUT,">scaf_sort_length.txt") or die ("can't create the file!\n");my $total_len = 0;my $ref_len = 6e6;my $flag = 1;my @count_len;my %scaf_hash;$/=">";;while( ){ chomp; my ($name,@seq) = split(/\n/,$_); my $scafseq = join("",@seq); $total_len += length($scafseq); my $ID = (split(/\s+/,$name))[0]; $scaf_hash{ $ID}=length($scafseq);}my $count = 0;foreach my $key (sort { $scaf_hash{ $b}<=>$scaf_hash{ $a}} keys %scaf_hash){ print OUT "$scaf_hash{$key}\n"; push(@count_len,$scaf_hash{ $key});}foreach my $len (@count_len){ $count += $len; if ($count /$total_len >=0.5 && $flag){ print "N50: $len\n"; $flag = 0; } if ($count/$total_len>=0.9){ print "N90: $len\n"; last; }}close IN;close OUT;
4. 长度累积曲线分布图
04.scaf_acclen_graph.R
data<-read.table("scaf_sort_length.txt",header=F)len<-data[,1]head(len)sum<-0acclen<-numeric(length(len))for (i in 1:nrow(data)){ sum <-sum + len[i]; acclen[i]<-sum;}head(acclen)pdf("Length.pdf")plot(x=(1:length(acclen)),y=acclen,xlab="accseq ID",ylab="acclen",type="l",col="blue",main="scaSeq accumulate length grap")dev.off()