计算单机版Blat的
单机版Blat的匹配结果中没有percent identity(网络版的Blat输出结果中有)。为了筛选输出结果,常常需要计算每个匹配的percent identity;UCSC的Blat—FAQ中虽然给出了相应的解决办法(http://genome.ucsc.edu/FAQ/FAQblat#blat4),但其代码是用C写的,不利于使用
Perl的生物信息学工作人员调用。此处给出相应的perl代码,如下所示(非原创,版权归原作者所有):
while (<>) {
chomp $_;
my @v = split( /\t/, $_ );
get_pid(@v);
}sub get_pid {
my @line = @_;
my $pid = ( 100.0 - ( &pslCalcMilliBad(@line) * 0.1 ) );
print "The percentage: $pid\n"; #return $pid;
}sub pslCalcMilliBad {
my @cols = @_; # sizeNul depens of dna/Prot
my $sizeMul = 1; #if ($option{p}) {
# $sizeMul = 3;
#} else {
# $sizeMul = 1;
#} # cols[0] matches
# cols[1] misMatches
# cols[2] repMaches
# cols[4] qNumInsert
# cols[6] tNumInsert
# cols[11] qStart
# cols[12] qEnd
# cols[15] tStart
# cols[16] tEnd my $qAliSize = $sizeMul * ( $cols[12] - $cols[11] );
my $tAliSize = $cols[16] - $cols[15]; # I want the minimum of qAliSize and tAliSize
my $aliSize;
$qAliSize < $tAliSize ? $aliSize = $qAliSize : $aliSize = $tAliSize; # return 0 is AliSize == 0
return 0 if ( $aliSize <= 0 ); # size diff
my $sizeDiff = $qAliSize - $tAliSize;
if ( $sizeDiff < 0 ) {
$sizeDiff = 0; #if ($option{m}) {
#$sizeDiff = 0;
#} else {
#$sizeDiff = -($sizeDiff);
#}
} # insert Factor
my $insertFactor = $cols[4]; #$insertFactor += $cols[6] unless ($option{m});
my $milliBad = (
1000 * (
$cols[1] * $sizeMul +
$insertFactor +
&round( 3 * log( 1 + $sizeDiff ) )
)
) / ( $sizeMul * ( $cols[0] + $cols[2] + $cols[1] ) );
return $milliBad;}sub round {
my $number = shift;
return int( $number + .5 );
}