加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
MTR.pm 3.97 KB
一键复制 编辑 原始数据 按行查看 历史
=head1 CONTACT
Slave Petrovski <slavep@unimelb.edu.au>
Michael Silk <silkm@student.unimelb.edu.au>
=cut
=head1 NAME
MTR (Missense Tolerance Ratio)
=head1 SYNOPSIS
mv MTR.pm ~/.vep/Plugins
curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz
curl -O ftp://mtr-viewer.mdhs.unimelb.edu.au/pub/mtrflatfile_1.0.txt.gz.tbi
perl variant_effect_predictor.pl -i variations.vcf --plugin MTR,mtrflatfile_1.0.txt.gz
=head1 DESCRIPTION
A VEP plugin that retrieves Missense Tolerance Ratio (MTR) scores for
variants from a tabix-indexed flat file.
MTR scores quantify the amount of purifying selection acting
specifically on missense variants in a given window of protein-coding
sequence. It is estimated across a sliding window of 31 codons and uses
observed standing variation data from the WES component of the Exome
Aggregation Consortium Database (ExAC), version 2.0
(http://gnomad.broadinstitute.org).
Please cite the MTR publication alongside the VEP if you use this resource:
http://genome.cshlp.org/content/27/10/1715
The Bio::DB::HTS perl library or tabix utility must be installed in your path
to use this plugin. MTR flat files can be downloaded from:
ftp://mtr-viewer.mdhs.unimelb.edu.au/pub
NB: Data are available for GRCh37 only
=cut
package MTR;
use strict;
use warnings;
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin;
use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin);
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
# test tabix
die "ERROR: tabix does not seem to be in your path\n" unless `which tabix 2>&1` =~ /tabix$/;
# get MTR file
my $file = $self->params->[0];
$self->add_file($file);
# remote files?
if($file =~ /tp\:\/\//) {
my $remote_test = `tabix -f $file 1:1-1 2>&1`;
if($remote_test && $remote_test !~ /get_local_version/) {
die "$remote_test\nERROR: Could not find file or index file for remote annotation file $file\n";
}
}
# check files exist
else {
die "ERROR: MTR file $file not found\n" unless -e $file;
die "ERROR: Tabix index file $file\.tbi not found - perhaps you need to create it first?\n" unless -e $file.'.tbi';
}
# get headers and store on self
open HEAD, "tabix -fh $file 1:1-1 2>&1 | ";
while(<HEAD>) {
next unless /^\#/;
chomp;
$_ =~ s/^\#//;
$self->{headers} = [split];
}
close HEAD;
return $self;
}
sub feature_types {
return ['Transcript'];
}
sub variation_feature_types {
return ['VariationFeature'];
}
sub get_header_info {
my $self = shift;
return {
MTR => 'MTR score',
FDR => 'MTR false discovery rate adjusted binomial exact test.',
MTR_centile => 'MTR gene-specific percentile'
}
}
sub run {
my ($self, $tva) = @_;
my $vf = $tva->variation_feature;
# get allele, reverse comp if needed
my $allele = $tva->variation_feature_seq;
reverse_comp(\$allele) if $vf->{strand} < 0;
return {} unless $allele =~ /^[ACGT]$/;
my $tr_id = $tva->transcript->stable_id;
# data is written by pos, allele, transcript ID (feature)
# grep lines read in matched on position so that they also are matched on allele and transcript ID
my ($res) = grep {
$_->{Genomic_position} eq $vf->{start} &&
$_->{Genomic_position} eq $vf->{end} &&
$_->{alt} eq $allele &&
$_->{Feature} eq $tr_id
} @{$self->get_data($vf->{chr}, $vf->{start}, $vf->{end})};
# return only the keys defined by get_header_info()
return $res ? { map {$_ => $res->{$_}} grep {defined($res->{$_}) && $res->{$_} ne '.'} keys %{$self->get_header_info} } : {};
}
sub parse_data {
my ($self, $line) = @_;
$line =~ s/\r$//g;
my @split = split /\t/, $line;
# parse data into hash of col names and values
my %data = map {$self->{headers}->[$_] => $split[$_]} (0..(scalar @{$self->{headers}} - 1));
return \%data;
}
sub get_start {
return $_[1]->{'Genomic_position'};
}
sub get_end {
return $_[1]->{'Genomic_position'};
}
1;
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化